From: Henry Cook Date: Fri, 7 Nov 2014 01:24:39 +0000 (-0800) Subject: Clean up canonical mt benchmarks and reorganize extra versions in /mt. All versions... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d537de7deffa6036dab573ff174b7f8c8e470437;p=riscv-tests.git Clean up canonical mt benchmarks and reorganize extra versions in /mt. All versions support support at least 1/2/4 threads. --- diff --git a/benchmarks/Makefile b/benchmarks/Makefile index f8db5b9..3346c7c 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -26,8 +26,8 @@ bmarks = \ dhrystone \ spmv \ mt-vvadd \ + mt-matmul \ #vec-fft \ - #mt-matmul \ #vec-vvadd \ #vec-cmplxmult \ #vec-matmul \ @@ -38,7 +38,6 @@ bmarks_host = \ towers \ vvadd \ multiply \ - mm \ spmv \ vec-vvadd \ vec-cmplxmult \ diff --git a/benchmarks/common/util.h b/benchmarks/common/util.h index 6c4f963..638f024 100644 --- a/benchmarks/common/util.h +++ b/benchmarks/common/util.h @@ -113,4 +113,15 @@ static void __attribute__((noinline)) barrier(int ncores) #include "encoding.h" #endif +#define stringify_1(s) #s +#define stringify(s) stringify_1(s) +#define stats(code, iter) do { \ + unsigned long _c = -rdcycle(), _i = -rdinstret(); \ + code; \ + _c += rdcycle(), _i += rdinstret(); \ + if (cid == 0) \ + printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ + stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ + } while(0) + #endif //__UTIL_H diff --git a/benchmarks/mt-matmul/bmark.mk b/benchmarks/mt-matmul/bmark.mk index 4b7fcb7..6a7140f 100644 --- a/benchmarks/mt-matmul/bmark.mk +++ b/benchmarks/mt-matmul/bmark.mk @@ -10,6 +10,7 @@ mt_matmul_c_src = \ mt-matmul.c \ + matmul.c \ syscalls.c \ mt_matmul_riscv_src = \ diff --git a/benchmarks/mt-matmul/dataset.h b/benchmarks/mt-matmul/dataset.h index dde3ee4..2c59a33 100644 --- a/benchmarks/mt-matmul/dataset.h +++ b/benchmarks/mt-matmul/dataset.h @@ -1,9 +1,12 @@ +#ifndef __DATASET_H +#define __DATASET_H #define ARRAY_SIZE 1024 - #define DIM_SIZE 32 +typedef double data_t; + static data_t input1_data[ARRAY_SIZE] = { 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, @@ -172,3 +175,5 @@ static data_t verify_data[ARRAY_SIZE] = 69, 73, 94, 89 }; + +#endif //__DATASET_H diff --git a/benchmarks/mt-matmul/matmul.c b/benchmarks/mt-matmul/matmul.c new file mode 100644 index 0000000..95fbe03 --- /dev/null +++ b/benchmarks/mt-matmul/matmul.c @@ -0,0 +1,20 @@ +#include "dataset.h" + +//-------------------------------------------------------------------------- +// single-thread, naive version +// +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k; + + for ( i = 0; i < lda; i++ ) + { + for ( j = 0; j < lda; j++ ) + { + for ( k = coreid; k < lda; k+=ncores ) + { + C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; + } + } + } +} diff --git a/benchmarks/mt-matmul/mt-matmul.c b/benchmarks/mt-matmul/mt-matmul.c index 1584a5d..2353962 100644 --- a/benchmarks/mt-matmul/mt-matmul.c +++ b/benchmarks/mt-matmul/mt-matmul.c @@ -25,62 +25,20 @@ //-------------------------------------------------------------------------- // Input/Reference Data -typedef double data_t; #include "dataset.h" - + //-------------------------------------------------------------------------- // Basic Utilities and Multi-thread Support -__thread unsigned long coreid; - #include "util.h" + -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - //-------------------------------------------------------------------------- // matmul function -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - + extern void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ); -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} //-------------------------------------------------------------------------- // Main @@ -90,46 +48,16 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da void thread_entry(int cid, int nc) { - coreid = cid; - - // static allocates data in the binary, which is visible to both threads static data_t results_data[ARRAY_SIZE]; - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); + stats(matmul(cid, nc, DIM_SIZE, input1_data, input2_data, results_data); barrier(nc), DIM_SIZE/DIM_SIZE/DIM_SIZE); - - // verify int res = verifyDouble(ARRAY_SIZE, results_data, verify_data); - if (res) - exit(res); - -#if 0 - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - #ifdef DEBUG printArray("results:", ARRAY_SIZE, results_data); printArray("verify :", ARRAY_SIZE, verify_data); #endif - - // verify - res = verify(ARRAY_SIZE, results_data, verify_data); - if (res) - exit(res); - barrier(nc); -#endif - exit(0); + exit(res); } diff --git a/benchmarks/mt-vvadd/bmark.mk b/benchmarks/mt-vvadd/bmark.mk index 72b2d34..ff969c1 100644 --- a/benchmarks/mt-vvadd/bmark.mk +++ b/benchmarks/mt-vvadd/bmark.mk @@ -10,6 +10,7 @@ mt_vvadd_c_src = \ mt-vvadd.c \ + vvadd.c \ syscalls.c \ mt_vvadd_riscv_src = \ diff --git a/benchmarks/mt-vvadd/dataset.h b/benchmarks/mt-vvadd/dataset.h index ce9f936..51f25df 100644 --- a/benchmarks/mt-vvadd/dataset.h +++ b/benchmarks/mt-vvadd/dataset.h @@ -1,6 +1,10 @@ +#ifndef __DATASET_H +#define __DATASET_H #define DATA_SIZE 1000 +typedef double data_t; + static data_t input1_data[DATA_SIZE] = { 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, @@ -163,3 +167,5 @@ static data_t verify_data[DATA_SIZE] = 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 }; + +#endif //__DATASET_H diff --git a/benchmarks/mt-vvadd/mt-vvadd.c b/benchmarks/mt-vvadd/mt-vvadd.c index 2116115..48eae6a 100644 --- a/benchmarks/mt-vvadd/mt-vvadd.c +++ b/benchmarks/mt-vvadd/mt-vvadd.c @@ -24,49 +24,20 @@ //-------------------------------------------------------------------------- // Input/Reference Data -typedef double data_t; #include "dataset.h" //-------------------------------------------------------------------------- // Basic Utilities and Multi-thread Support -__thread unsigned long coreid; - #include "util.h" -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) //-------------------------------------------------------------------------- // vvadd function -//perform in-place vvadd -void __attribute__((noinline)) vvadd(int ncores, size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} +extern void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z); -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // -} //-------------------------------------------------------------------------- // Main @@ -76,57 +47,41 @@ void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const void thread_entry(int cid, int nc) { - coreid = cid; - // static allocates data in the binary, which is visible to both threads static data_t results_data[DATA_SIZE]; - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd + // First do out-of-place vvadd barrier(nc); - stats(vvadd(nc, DATA_SIZE, results_data, input2_data); barrier(nc)); + stats(vvadd(cid, nc, DATA_SIZE, input1_data, input2_data, results_data); barrier(nc), DATA_SIZE); - - // verify - int res = verifyDouble(DATA_SIZE, results_data, verify_data); - if (res) - exit(res); - -#if 0 - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; + if(cid == 0) { +//#ifdef DEBUG + printDoubleArray("out-of-place results: ", DATA_SIZE, results_data); + printDoubleArray("out-of-place verify : ", DATA_SIZE, verify_data); +//#endif + int res = verifyDouble(DATA_SIZE, results_data, verify_data); + if(res) exit(res); } - barrier(nc); - // Execute your faster vvadd + // Second do in-place vvadd + // Copying input + size_t i; + if(cid == 0) { + for (i = 0; i < DATA_SIZE; i++) + results_data[i] = input1_data[i]; + } barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - + stats(vvadd(cid, nc, DATA_SIZE, results_data, input2_data, results_data); barrier(nc), DATA_SIZE); + + if(cid == 0) { #ifdef DEBUG - printDoubleArray("results: ", DATA_SIZE, results_data); - printDoubleArray("verify : ", DATA_SIZE, verify_data); + printDoubleArray("in-place results: ", DATA_SIZE, results_data); + printDoubleArray("in-place verify : ", DATA_SIZE, verify_data); #endif + int res = verifyDouble(DATA_SIZE, results_data, verify_data); + if(res) exit(res); + } - // verify - res = verifyDouble(DATA_SIZE, results_data, verify_data); - if (res) - exit(res); barrier(nc); -#endif - exit(0); } diff --git a/benchmarks/mt-vvadd/vvadd.c b/benchmarks/mt-vvadd/vvadd.c new file mode 100644 index 0000000..8f4d43f --- /dev/null +++ b/benchmarks/mt-vvadd/vvadd.c @@ -0,0 +1,16 @@ +#include "stdlib.h" +#include "dataset.h" + +//-------------------------------------------------------------------------- +// vvadd function + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + size_t i; + + // interleave accesses + for (i = coreid; i < n; i+=ncores) + { + z[i] = x[i] + y[i]; + } +} diff --git a/mt/Makefile b/mt/Makefile index 1d85ed3..16240b0 100755 --- a/mt/Makefile +++ b/mt/Makefile @@ -1,4 +1,4 @@ -#======================================================================= +#/======================================================================= # UCB VLSI FLOW: Makefile for riscv-bmarks/mt #----------------------------------------------------------------------- # Henry Cook (hcook@cs.berkeley.edu) @@ -16,73 +16,60 @@ instbasedir = $(UCB_VLSI_HOME)/install # Sources #-------------------------------------------------------------------- -bmarks = \ -ab_matmul\ -ab_vvadd\ +bmarks_matmul = \ ad_matmul\ -ad_vvadd\ ae_matmul\ -ae_vvadd\ af_matmul\ -af_vvadd\ ag_matmul\ -ag_vvadd\ ai_matmul\ -ai_vvadd\ -aj_vvadd\ ak_matmul\ -ak_vvadd\ al_matmul\ -al_vvadd\ am_matmul\ -am_vvadd\ an_matmul\ ap_matmul\ -ap_vvadd\ aq_matmul\ -aq_vvadd\ ar_matmul\ -ar_vvadd\ -as_matmul\ -as_vvadd\ at_matmul\ -at_vvadd\ av_matmul\ -av_vvadd\ ay_matmul\ -ay_vvadd\ az_matmul\ -az_vvadd\ -ba_matmul\ -ba_vvadd\ bb_matmul\ -bb_vvadd\ bc_matmul\ -bc_vvadd\ -be_matmul\ -be_vvadd\ bf_matmul\ -bf_vvadd\ bh_matmul\ -bh_vvadd\ bj_matmul\ -bj_vvadd\ bk_matmul\ -bk_vvadd\ bm_matmul\ -bm_vvadd\ -bn_matmul\ -bn_vvadd\ bo_matmul\ -bo_vvadd\ -bp_matmul\ -bp_vvadd\ br_matmul\ -br_vvadd\ bs_matmul\ -bs_vvadd\ -bt_matmul\ -bt_vvadd\ +ce_matmul\ +cf_matmul\ +cg_matmul\ +ci_matmul\ +ck_matmul\ +cl_matmul\ +cm_matmul\ +cs_matmul\ +cv_matmul\ +cy_matmul\ +dc_matmul\ +df_matmul\ +dm_matmul\ +do_matmul\ +dr_matmul\ +ds_matmul\ +du_matmul\ +dv_matmul\ + +bmarks_vvadd = \ +vvadd0\ +vvadd1\ +vvadd2\ +vvadd3\ +vvadd4\ + +bmarks = $(bmarks_vvadd) $(bmarks_matmul) #-------------------------------------------------------------------- # Build rules @@ -95,10 +82,9 @@ RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data RISCV_SIM = spike -p2 -VPATH += $(addprefix $(bmarkdir)/, $(bmarks)) -VPATH += $(common) +VPATH += $(common) $(common)/../mt-matmul $(common)/../mt-vvadd -incs += -I. -I$(bmarkdir)/../env -I$(common) $(addprefix -I$(bmarkdir)/, $(bmarks)) +incs += -I. -I$(bmarkdir)/../env -I$(common) -I$(common)/../mt-matmul -I$(common)/../mt-vvadd objs := #include $(patsubst %, $(bmarkdir)/%/bmark.mk, $(bmarks)) @@ -108,10 +94,12 @@ objs := #------------------------------------------------------------ bmarks_riscv_obj = $(addsuffix .o, $(bmarks)) -bmarks_riscv_bin = $(addsuffix .riscv, $(bmarks)) +bmarks_riscv_matmul_bin = $(addsuffix .riscv, $(bmarks_matmul)) +bmarks_riscv_vvadd_bin = $(addsuffix .riscv, $(bmarks_vvadd)) bmarks_riscv_dump = $(addsuffix .riscv.dump, $(bmarks)) bmarks_riscv_hex = $(addsuffix .riscv.hex, $(bmarks)) bmarks_riscv_out = $(addsuffix .riscv.out, $(bmarks)) +bmarks_riscv_bin = $(bmarks_riscv_matmul_bin) $(bmarks_riscv_vvadd_bin) bmarks_defs = -DPREALLOCATE=1 -DHOST_DEBUG=0 bmarks_cycles = 80000 @@ -119,8 +107,11 @@ bmarks_cycles = 80000 %.hex: % elf2hex 16 32768 $< > $@ -$(bmarks_riscv_bin): %.riscv: %.o syscalls.o crt.o - $(RISCV_LINK) $< syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@ +$(bmarks_riscv_vvadd_bin): %.riscv: %.o mt-vvadd.o syscalls.o crt.o + $(RISCV_LINK) $< mt-vvadd.o syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@ + +$(bmarks_riscv_matmul_bin): %.riscv: %.o mt-matmul.o syscalls.o crt.o + $(RISCV_LINK) $< mt-matmul.o syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@ $(bmarks_riscv_dump): %.riscv.dump: %.riscv $(RISCV_OBJDUMP) $< > $@ @@ -140,7 +131,7 @@ riscv: $(bmarks_riscv_dump) $(bmarks_riscv_hex) run-riscv: $(bmarks_riscv_out) echo; perl -ne 'print " [$$1] $$ARGV \t$$2\n" if /\*{3}(.{8})\*{3}(.*)/' \ -junk += $(bmarks_riscv_bin) $(bmarks_riscv_dump) $(bmarks_riscv_hex) $(bmarks_riscv_out) +junk += $(bmarks_riscv_bin) $(bmarks_riscv_dump) $(bmarks_riscv_hex) $(bmarks_riscv_out) $(bmarks_riscv_obj) #------------------------------------------------------------ @@ -167,4 +158,4 @@ install-link: # Clean up clean: - rm -rf $(objs) $(junk) + rm -rf $(objs) $(junk) syscall.o crt.o mt-matmul.o mt-vvadd.o diff --git a/mt/ab_matmul/ab_matmul.c b/mt/ab_matmul/ab_matmul.c deleted file mode 100755 index 6530a5d..0000000 --- a/mt/ab_matmul/ab_matmul.c +++ /dev/null @@ -1,246 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - // I think I've got a way for this to not need the "shared" state to work nicely, so no MSI version - int i, j, k, lda_over_2; - lda_over_2 = lda/2; - - if(coreid > 1) - return; - // left side of c - if(coreid == 0) - { - // first half of topleft corner - for(i = 0; i < lda_over_2; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of topleft corner - for(i = 0; i < lda_over_2; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of bottomleft corner - for(i = lda_over_2; i < lda; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // first half of bottomleft corner - for(i = lda_over_2; i < lda; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - } - else // coreid == 1 - { - // first half of bottomright corner - for(i = lda_over_2; i < lda; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of bottomright corner - for(i = lda_over_2; i < lda; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of topright corner - for(i = 0; i < lda_over_2; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // first half of topright corner - for(i = 0; i < lda_over_2; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - } - - return; -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ab_matmul/dataset.h b/mt/ab_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ab_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ab_matmul/matmul_gendata.pl b/mt/ab_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ab_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ab_matmul/matmul_mi.c b/mt/ab_matmul/matmul_mi.c deleted file mode 100755 index 6530a5d..0000000 --- a/mt/ab_matmul/matmul_mi.c +++ /dev/null @@ -1,246 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - // I think I've got a way for this to not need the "shared" state to work nicely, so no MSI version - int i, j, k, lda_over_2; - lda_over_2 = lda/2; - - if(coreid > 1) - return; - // left side of c - if(coreid == 0) - { - // first half of topleft corner - for(i = 0; i < lda_over_2; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of topleft corner - for(i = 0; i < lda_over_2; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of bottomleft corner - for(i = lda_over_2; i < lda; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // first half of bottomleft corner - for(i = lda_over_2; i < lda; i++) { - for(j = 0; j < lda_over_2; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - } - else // coreid == 1 - { - // first half of bottomright corner - for(i = lda_over_2; i < lda; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of bottomright corner - for(i = lda_over_2; i < lda; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // second half of topright corner - for(i = 0; i < lda_over_2; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = lda_over_2; k < lda; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - // first half of topright corner - for(i = 0; i < lda_over_2; i++) { - for(j = lda_over_2; j < lda; j++) { - for(k = 0; k < lda_over_2; k++) { - C[i*lda + j] += A[i*lda + k]*B[k*lda + j]; - } - } - } - } - - return; -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ab_vvadd/ab_vvadd.c b/mt/ab_vvadd/ab_vvadd.c deleted file mode 100755 index f2c8a65..0000000 --- a/mt/ab_vvadd/ab_vvadd.c +++ /dev/null @@ -1,172 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i, j; - j = (coreid+1)*n/ncores; - for (i = coreid*n/ncores; i < j; i++) - { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ab_vvadd/dataset.h b/mt/ab_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ab_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ab_vvadd/vvadd_gendata.pl b/mt/ab_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ab_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ad_matmul.c b/mt/ad_matmul.c new file mode 100755 index 0000000..60e6e6c --- /dev/null +++ b/mt/ad_matmul.c @@ -0,0 +1,37 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, k; + int j = coreid*(lda/ncores); + int jend = (coreid+1)*(lda/ncores); + for ( ; j < jend; j++ ) + { + int j32 = j << 5; + data_t* Cj32 = C + j32; + for ( k = 0; k < 32; k+=2 ) + { + data_t Aj32k = A[k + j32]; + data_t Aj32k2 = A[k + 1 + j32]; + data_t* Bk32 = B + (k << 5); + data_t* Bk322 = Bk32 + 32; + for ( i = 0; i < 32; i+=4 ) + { + Cj32[i] += Aj32k * Bk32 [i]; + Cj32[i] += Aj32k2 * Bk322 [i]; + Cj32[i+1] += Aj32k * Bk32 [i+1]; + Cj32[i+1] += Aj32k2 * Bk322[i+1]; + Cj32[i+2] += Aj32k * Bk32 [i+2]; + Cj32[i+2] += Aj32k2 * Bk322[i+2]; + Cj32[i+3] += Aj32k * Bk32 [i+3]; + Cj32[i+3] += Aj32k2 * Bk322[i+3]; + } + barrier(ncores); + } + } + + +} diff --git a/mt/ad_matmul/ad_matmul.c b/mt/ad_matmul/ad_matmul.c deleted file mode 100755 index da9aaec..0000000 --- a/mt/ad_matmul/ad_matmul.c +++ /dev/null @@ -1,196 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j=0, k, jend=16; - if (coreid != 0) { - j = jend; - jend = jend << 1; - } - for ( ; j < jend; j++ ) - { - int j32 = j << 5; - data_t* Cj32 = C + j32; - for ( k = 0; k < 32; k+=2 ) - { - data_t Aj32k = A[k + j32]; - data_t Aj32k2 = A[k + 1 + j32]; - data_t* Bk32 = B + (k << 5); - data_t* Bk322 = Bk32 + 32; - for ( i = 0; i < 32; i+=4 ) - { - Cj32[i] += Aj32k * Bk32 [i]; - Cj32[i] += Aj32k2 * Bk322 [i]; - Cj32[i+1] += Aj32k * Bk32 [i+1]; - Cj32[i+1] += Aj32k2 * Bk322[i+1]; - Cj32[i+2] += Aj32k * Bk32 [i+2]; - Cj32[i+2] += Aj32k2 * Bk322[i+2]; - Cj32[i+3] += Aj32k * Bk32 [i+3]; - Cj32[i+3] += Aj32k2 * Bk322[i+3]; - } - } - } - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ad_matmul/dataset.h b/mt/ad_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ad_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ad_matmul/matmul_gendata.pl b/mt/ad_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ad_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ad_matmul/matmul_mi.c b/mt/ad_matmul/matmul_mi.c deleted file mode 100755 index da9aaec..0000000 --- a/mt/ad_matmul/matmul_mi.c +++ /dev/null @@ -1,196 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j=0, k, jend=16; - if (coreid != 0) { - j = jend; - jend = jend << 1; - } - for ( ; j < jend; j++ ) - { - int j32 = j << 5; - data_t* Cj32 = C + j32; - for ( k = 0; k < 32; k+=2 ) - { - data_t Aj32k = A[k + j32]; - data_t Aj32k2 = A[k + 1 + j32]; - data_t* Bk32 = B + (k << 5); - data_t* Bk322 = Bk32 + 32; - for ( i = 0; i < 32; i+=4 ) - { - Cj32[i] += Aj32k * Bk32 [i]; - Cj32[i] += Aj32k2 * Bk322 [i]; - Cj32[i+1] += Aj32k * Bk32 [i+1]; - Cj32[i+1] += Aj32k2 * Bk322[i+1]; - Cj32[i+2] += Aj32k * Bk32 [i+2]; - Cj32[i+2] += Aj32k2 * Bk322[i+2]; - Cj32[i+3] += Aj32k * Bk32 [i+3]; - Cj32[i+3] += Aj32k2 * Bk322[i+3]; - } - } - } - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ad_vvadd/ad_vvadd.c b/mt/ad_vvadd/ad_vvadd.c deleted file mode 100755 index 4b77dc5..0000000 --- a/mt/ad_vvadd/ad_vvadd.c +++ /dev/null @@ -1,176 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - size_t m = n/2; - if (coreid == 0) { - for (i = 0; i < m; i++) { - x[i] = x[i] + y[i]; - } - } else { - for (i = m; i < n; i++) { - x[i] = x[i] + y[i]; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ad_vvadd/dataset.h b/mt/ad_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ad_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ad_vvadd/vvadd_gendata.pl b/mt/ad_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ad_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ae_matmul.c b/mt/ae_matmul.c new file mode 100755 index 0000000..a1f97b2 --- /dev/null +++ b/mt/ae_matmul.c @@ -0,0 +1,110 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + + + data_t *b1; + data_t *b2; + data_t *b3; + data_t *b4; + data_t c1; + data_t c2; + data_t c3; + data_t c4; + data_t a1; + data_t a2; + data_t a3; + data_t a4; + data_t a5; + data_t a6; + data_t a7; + data_t a8; + int i, j, k; + static data_t BB[1024]; + + + + //transpose B + for ( k = 0; k < lda; k++) { + for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores); i++ ) { + BB[i*lda + k] = B[k*lda + i]; + } + barrier(ncores); + } + + for ( i = 0; i < lda; i+=4 ) { + for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) { + c1 = 0; c2 = 0; c3 = 0; c4 = 0; + b1 = &BB[(i+0)*lda]; + b2 = &BB[(i+1)*lda]; + b3 = &BB[(i+2)*lda]; + b4 = &BB[(i+3)*lda]; + for ( k = 0; k < lda; k+=8 ) { + + a1 = A[j*lda + k+0]; + a2 = A[j*lda + k+1]; + a3 = A[j*lda + k+2]; + a4 = A[j*lda + k+3]; + a5 = A[j*lda + k+4]; + a6 = A[j*lda + k+5]; + a7 = A[j*lda + k+6]; + a8 = A[j*lda + k+7]; + + c1 += a1 * b1[k+0]; + c1 += a2 * b1[k+1]; + c1 += a3 * b1[k+2]; + c1 += a4 * b1[k+3]; + c1 += a5 * b1[k+4]; + c1 += a6 * b1[k+5]; + c1 += a7 * b1[k+6]; + c1 += a8 * b1[k+7]; + + c2 += a1 * b2[k+0]; + c2 += a2 * b2[k+1]; + c2 += a3 * b2[k+2]; + c2 += a4 * b2[k+3]; + c2 += a5 * b2[k+4]; + c2 += a6 * b2[k+5]; + c2 += a7 * b2[k+6]; + c2 += a8 * b2[k+7]; + + c3 += a1 * b3[k+0]; + c3 += a2 * b3[k+1]; + c3 += a3 * b3[k+2]; + c3 += a4 * b3[k+3]; + c3 += a5 * b3[k+4]; + c3 += a6 * b3[k+5]; + c3 += a7 * b3[k+6]; + c3 += a8 * b3[k+7]; + + c4 += a1 * b4[k+0]; + c4 += a2 * b4[k+1]; + c4 += a3 * b4[k+2]; + c4 += a4 * b4[k+3]; + c4 += a5 * b4[k+4]; + c4 += a6 * b4[k+5]; + c4 += a7 * b4[k+6]; + c4 += a8 * b4[k+7]; + + + } + C[i+0 + j*lda] = c1; + C[i+1 + j*lda] = c2; + C[i+2 + j*lda] = c3; + C[i+3 + j*lda] = c4; + barrier(ncores); + } + } + +} diff --git a/mt/ae_matmul/ae_matmul.c b/mt/ae_matmul/ae_matmul.c deleted file mode 100755 index 7a2e79d..0000000 --- a/mt/ae_matmul/ae_matmul.c +++ /dev/null @@ -1,263 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - - - data_t *b1; - data_t *b2; - data_t *b3; - data_t *b4; - data_t c1; - data_t c2; - data_t c3; - data_t c4; - data_t a1; - data_t a2; - data_t a3; - data_t a4; - data_t a5; - data_t a6; - data_t a7; - data_t a8; - int i, j, k; - static data_t BB[1024]; - - - - //transpose B - if (coreid == 0 | coreid == 1) { - for ( k = 0; k < lda; k++) { - for ( i = coreid*(lda/2); i < (coreid+1)*(lda/2); i++ ) { - BB[i*lda + k] = B[k*lda + i]; - } - } - } - barrier(ncores); - - for ( i = 0; i < lda; i+=4 ) { - for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) { - c1 = 0; c2 = 0; c3 = 0; c4 = 0; - b1 = &BB[(i+0)*lda]; - b2 = &BB[(i+1)*lda]; - b3 = &BB[(i+2)*lda]; - b4 = &BB[(i+3)*lda]; - for ( k = 0; k < lda; k+=8 ) { - - a1 = A[j*lda + k+0]; - a2 = A[j*lda + k+1]; - a3 = A[j*lda + k+2]; - a4 = A[j*lda + k+3]; - a5 = A[j*lda + k+4]; - a6 = A[j*lda + k+5]; - a7 = A[j*lda + k+6]; - a8 = A[j*lda + k+7]; - - c1 += a1 * b1[k+0]; - c1 += a2 * b1[k+1]; - c1 += a3 * b1[k+2]; - c1 += a4 * b1[k+3]; - c1 += a5 * b1[k+4]; - c1 += a6 * b1[k+5]; - c1 += a7 * b1[k+6]; - c1 += a8 * b1[k+7]; - - c2 += a1 * b2[k+0]; - c2 += a2 * b2[k+1]; - c2 += a3 * b2[k+2]; - c2 += a4 * b2[k+3]; - c2 += a5 * b2[k+4]; - c2 += a6 * b2[k+5]; - c2 += a7 * b2[k+6]; - c2 += a8 * b2[k+7]; - - c3 += a1 * b3[k+0]; - c3 += a2 * b3[k+1]; - c3 += a3 * b3[k+2]; - c3 += a4 * b3[k+3]; - c3 += a5 * b3[k+4]; - c3 += a6 * b3[k+5]; - c3 += a7 * b3[k+6]; - c3 += a8 * b3[k+7]; - - c4 += a1 * b4[k+0]; - c4 += a2 * b4[k+1]; - c4 += a3 * b4[k+2]; - c4 += a4 * b4[k+3]; - c4 += a5 * b4[k+4]; - c4 += a6 * b4[k+5]; - c4 += a7 * b4[k+6]; - c4 += a8 * b4[k+7]; - - - } - C[i+0 + j*lda] = c1; - C[i+1 + j*lda] = c2; - C[i+2 + j*lda] = c3; - C[i+3 + j*lda] = c4; - } - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - -/* - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); -*/ - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ae_matmul/dataset.h b/mt/ae_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ae_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ae_matmul/matmul_gendata.pl b/mt/ae_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ae_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ae_matmul/matmul_mi.c b/mt/ae_matmul/matmul_mi.c deleted file mode 100755 index cf464f4..0000000 --- a/mt/ae_matmul/matmul_mi.c +++ /dev/null @@ -1,311 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - data_t a1; - data_t a2; - data_t a3; - data_t a4; - data_t a5; - data_t a6; - data_t a7; - data_t a8; - data_t *b1; - data_t *b2; - data_t *b3; - data_t *b4; - data_t *b5; - data_t *b6; - data_t *b7; - data_t *b8; - data_t c1; - data_t c2; - data_t c3; - data_t c4; - data_t c5; - data_t c6; - data_t c7; - data_t c8; - int i, j, k; - int start, end; - static data_t BB[1024]; - - - //transpose B - if (coreid == 0 | coreid == 1 ) { - for ( k = 0; k < lda; k++) { - for ( i = coreid*(lda/2); i < (coreid+1)*(lda/2); i++ ) { - BB[i*lda + k] = B[k*lda + i]; - } - } - } - barrier(nc); - - for ( int x = 0; x < ncores; x++) { - //split the i values into two chunks so the threads don't interfere on the B loads - //this could be generalized if needed, but I won't bother since it would be tricky - //and we already know the size and numthreads - start = coreid == x ? 0 : 16; - end = coreid == x ? 16 : 32; - for ( i = start; i < end; i+=8 ) { - for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) { - c1=0;c2=0;c3=0;c4=0;c5=0;c6=0;c7=0;c8=0; - b1 = &BB[(i+0)*lda]; - b2 = &BB[(i+1)*lda]; - b3 = &BB[(i+2)*lda]; - b4 = &BB[(i+3)*lda]; - b5 = &BB[(i+4)*lda]; - b6 = &BB[(i+5)*lda]; - b7 = &BB[(i+6)*lda]; - b8 = &BB[(i+7)*lda]; - - for ( k = 0; k < lda; k+=8 ) { - a1 = A[j*lda + k+0]; - a2 = A[j*lda + k+1]; - a3 = A[j*lda + k+2]; - a4 = A[j*lda + k+3]; - a5 = A[j*lda + k+4]; - a6 = A[j*lda + k+5]; - a7 = A[j*lda + k+6]; - a8 = A[j*lda + k+7]; - - c1 += a1 * b1[k+0]; - c1 += a2 * b1[k+1]; - c1 += a3 * b1[k+2]; - c1 += a4 * b1[k+3]; - c1 += a5 * b1[k+4]; - c1 += a6 * b1[k+5]; - c1 += a7 * b1[k+6]; - c1 += a8 * b1[k+7]; - - c2 += a1 * b2[k+0]; - c2 += a2 * b2[k+1]; - c2 += a3 * b2[k+2]; - c2 += a4 * b2[k+3]; - c2 += a5 * b2[k+4]; - c2 += a6 * b2[k+5]; - c2 += a7 * b2[k+6]; - c2 += a8 * b2[k+7]; - - c3 += a1 * b3[k+0]; - c3 += a2 * b3[k+1]; - c3 += a3 * b3[k+2]; - c3 += a4 * b3[k+3]; - c3 += a5 * b3[k+4]; - c3 += a6 * b3[k+5]; - c3 += a7 * b3[k+6]; - c3 += a8 * b3[k+7]; - - c4 += a1 * b4[k+0]; - c4 += a2 * b4[k+1]; - c4 += a3 * b4[k+2]; - c4 += a4 * b4[k+3]; - c4 += a5 * b4[k+4]; - c4 += a6 * b4[k+5]; - c4 += a7 * b4[k+6]; - c4 += a8 * b4[k+7]; - - c5 += a1 * b5[k+0]; - c5 += a2 * b5[k+1]; - c5 += a3 * b5[k+2]; - c5 += a4 * b5[k+3]; - c5 += a5 * b5[k+4]; - c5 += a6 * b5[k+5]; - c5 += a7 * b5[k+6]; - c5 += a8 * b5[k+7]; - - c6 += a1 * b6[k+0]; - c6 += a2 * b6[k+1]; - c6 += a3 * b6[k+2]; - c6 += a4 * b6[k+3]; - c6 += a5 * b6[k+4]; - c6 += a6 * b6[k+5]; - c6 += a7 * b6[k+6]; - c6 += a8 * b6[k+7]; - - c7 += a1 * b7[k+0]; - c7 += a2 * b7[k+1]; - c7 += a3 * b7[k+2]; - c7 += a4 * b7[k+3]; - c7 += a5 * b7[k+4]; - c7 += a6 * b7[k+5]; - c7 += a7 * b7[k+6]; - c7 += a8 * b7[k+7]; - - c8 += a1 * b8[k+0]; - c8 += a2 * b8[k+1]; - c8 += a3 * b8[k+2]; - c8 += a4 * b8[k+3]; - c8 += a5 * b8[k+4]; - c8 += a6 * b8[k+5]; - c8 += a7 * b8[k+6]; - c8 += a8 * b8[k+7]; - } - C[i+0 + j*lda] += c1; - C[i+1 + j*lda] += c2; - C[i+2 + j*lda] += c3; - C[i+3 + j*lda] += c4; - C[i+4 + j*lda] += c5; - C[i+5 + j*lda] += c6; - C[i+6 + j*lda] += c7; - C[i+7 + j*lda] += c8; - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - -/* - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); -*/ - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ae_vvadd/ae_vvadd.c b/mt/ae_vvadd/ae_vvadd.c deleted file mode 100755 index b1d336b..0000000 --- a/mt/ae_vvadd/ae_vvadd.c +++ /dev/null @@ -1,178 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - - size_t sizepercore = n / ncores; - size_t start = coreid * sizepercore; - size_t end = (coreid + 1) * sizepercore; - for (i = start; i < end; i++) - { - x[i] = x[i] + y[i]; - } - - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ae_vvadd/dataset.h b/mt/ae_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ae_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ae_vvadd/vvadd_gendata.pl b/mt/ae_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ae_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/af_matmul.c b/mt/af_matmul.c new file mode 100755 index 0000000..a147b62 --- /dev/null +++ b/mt/af_matmul.c @@ -0,0 +1,79 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + size_t i, j, k, l; + int row,row2, column, column2, column3, column4, column5, column6, column7, column8; + data_t element, element2, element3, element4, element5, element6, element7, element8; + data_t B1, B2, B3, B4; + data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + int local_lda = lda; + + for (l=coreid*local_lda/ncores; l -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i,j,k,l; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i<32; i+=2){ - row = i*32; - row2 = (i+1)*32; - for (j=0; j<16; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==12){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - else if (coreid==1){ - for (i=0; i<32; i+=2){ - row = (31-i)*32; - row2 = (31-i-1)*32; - for (j=16; j<32; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==28){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} diff --git a/mt/af_matmul/Ronald.c~ b/mt/af_matmul/Ronald.c~ deleted file mode 100644 index 31ea15d..0000000 --- a/mt/af_matmul/Ronald.c~ +++ /dev/null @@ -1,246 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i,j,k,l; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i<32; i+=2){ - row = i*32; - row2 = (i+1)*32; - for (j=0; j<16; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==12){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - else if (coreid==1){ - for (i=0; i<32; i+=2){ - row = (31-i)*32; - row2 = (31-i-1)*32; - for (j=16; j<32; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==28){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(); - - - // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - -#ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); - - exit(0); -} diff --git a/mt/af_matmul/af_matmul.c b/mt/af_matmul/af_matmul.c deleted file mode 100755 index 4de06dd..0000000 --- a/mt/af_matmul/af_matmul.c +++ /dev/null @@ -1,237 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -data_t mult(data_t x, data_t y) -{ data_t result = 0; - size_t i; - for (i=0; i < x; i++) { - result += y; - } - return result; -} -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t B1, B2, B3, B4; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - int local_lda = lda; - - for (l=coreid*local_lda/ncores; l -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ - row=l*lda; - row2=(l+1)*lda; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=4){ - row=l*lda; - row2=(l+1)*lda; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -data_t mult(data_t x, data_t y) -{ data_t result = 0; - size_t i; - for (i=0; i < x; i++) { - result += y; - } - return result; -} -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t B1, B2, B3, B4; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - int local_lda = lda; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*local_lda/ncores; l -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i; - size_t i2; - size_t j; - size_t j2; - size_t k; - size_t k2; - size_t max_dim = lda*lda; - size_t block_size = lda/2; - data_t temp_mat[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0) { - //making a 16x16 block - //First block: Top 16x16 block left of A and top left of B = top left of C - //Second block: top right 16x16 right block of A and top right of B = top right of C - for (j2= 0; j2 < 2; j2++) { - for (i2 = 0; i2 < 2; i2++) { - //for (j2= 0; j2 < 2; j2++) { - //K represents which row of A and C - for (k = 0; k < block_size; k++) { - int rowIndex = k*32; - for (i = i2*block_size; i < i2*block_size+block_size; i++) { - int elementA = A[rowIndex+i]; - int columnIndex = i%32*32; - for (j = 0; j < block_size; j++) { - temp_mat[j] += elementA*B[columnIndex+j+j2*block_size]; - } - } - //Put temp_mat into actual result Matrix - for (k2 = 0; k2 < block_size; k2++) { - C[rowIndex+k2+j2*block_size] += temp_mat[k2]; - temp_mat[k2] = 0; - } - } - } - } - } else { - for (j2= 0; j2 < 2; j2++) { - for (i2 = 0; i2 < 2; i2++) { - //for (j2= 0; j2 < 2; j2++) { - //K represents which row of A and C - for (k = block_size; k < lda; k++) { - int rowIndex = k*32; - for (i = i2*block_size; i < i2*block_size+block_size; i++) { - int elementA = A[rowIndex+i]; - int columnIndex = i%32*32; - for (j = 0; j < block_size; j++) { - temp_mat[j] += elementA*B[columnIndex+j+j2*block_size]; - } - } - //Put temp_mat into actual result Matrix - for (k2 = 0; k2 < block_size; k2++) { - C[rowIndex+k2+j2*block_size] += temp_mat[k2]; - temp_mat[k2] = 0; - } - } - } - } - } - - - //size_t half_lda = lda/2; - // k = which pair of row we're on - - - - - - -/* - for (k = coreid*lda/ncores; k < (lda/ncores + coreid*lda/ncores); k += 2) { - //printf("%d", k); - for (i = 0; i < lda ; i++) { - int elementA = A[32*k+i]; - int elementA2 = A[i + 32*(k+1)]; - int column = i%32*32; - for (j = 0; j < lda; j++) { - C[32*k + j] += elementA*B[column+j]; - C[32*(k+1) + j] += elementA2*B[column+j]; - } - } - - } -*/ - -/* - data_t element=A[i]; - data_t element2 = A[i+1]; - data_t element3 = A[i+2]; - data_t element4 = A[i+3]; - data_t element5 = A[i+4]; - data_t element6 = A[i+5]; - data_t element7 = A[i+6]; - data_t element8 = A[i+7]; - int row= (int)(i/32)*32; - int row2 = (i+1)/32*32; - int row3 = (i+2)/32*32; - int row4 = (i+3)/32*32; - int row5 = (i+4)/32*32; - int row6 = (i+5)/32*32; - int row7 = (i+6)/32*32; - int row8 = (i+7)/32*32; - int column = i%32*32; - int column2 = (i+1)%32*32; - int column3 = (i+2)%32*32; - int column4 = (i+3)%32*32; - int column5 = (i+4)%32*32; - int column6 = (i+5)%32*32; - int column7 = (i+6)%32*32; - - */ - - //int column8 = (i+7)%32*32; - - /* - for (j=0; j < lda; j++) { - sum = B[ - C[row+j]+=element*B[column+j]; - C[row2+j]+=element2*B[column2+j]; - C[row3+j]+=element3*B[column3+j]; - C[row4+j]+=element4*B[column4+j]; - C[row5+j]+=element5*B[column5+j]; - C[row6+j]+=element6*B[column6+j]; - C[row7+j]+=element7*B[column7+j]; - C[row8+j]+=element8*B[column8+j]; - C[row+j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j]; - } - } - */ - - - - - - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} diff --git a/mt/af_matmul/failedattempt2.c b/mt/af_matmul/failedattempt2.c deleted file mode 100644 index 657c23d..0000000 --- a/mt/af_matmul/failedattempt2.c +++ /dev/null @@ -1,229 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i; - size_t i2; - size_t j; - size_t j2; - size_t k; - size_t k2; - size_t max_dim = lda*lda; - size_t block_size = lda/2; - int result = 0; - data_t temp_mat1[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0) { - for (k = 0; k < lda/2; k++) { - int columnIndex = 32*k; - - //temp_mat1 will store the kth column of B - for (i = 0; i < lda; i++) { - temp_mat1[i] = B[32*i + k]; - } - - for (j =0; j < lda; j++) { - int rowIndex = 32*j; - //iterate through each element of A in row J and accumulate result - for (i2 = 0; i2 -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row, row2, row3, row4, column, column2; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat3[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat4[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=4){ - row=l*lda; - row2=(l+1)*lda; - row3=(l+2)*lda; - row4=(l+3)*lda; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ - row=l*lda; - row2=(l+1)*lda; - if (coreid == 0) { - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ - row=l*lda; - row2=(l+1)*lda; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -data_t mult(data_t x, data_t y) -{ data_t result = 0; - size_t i; - for (i=0; i < x; i++) { - result += y; - } - return result; -} -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, row3, row4, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t element9, element10, element11, element12, element13, element14, element15, element16; - data_t elementB1,elementB2,elementB3,elementB4; - data_t temp_mat[128]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=4){ - row=l*lda; - row2=(l+1)*lda; - row3=(l+2)*lda; - row4=(l+3)*lda; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -data_t mult(data_t x, data_t y) -{ data_t result = 0; - size_t i; - for (i=0; i < x; i++) { - result += y; - } - return result; -} -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t B1, B2, B3, B4; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - int local_lda = lda; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*local_lda/ncores; l[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/af_matmul/matmul_mi.c b/mt/af_matmul/matmul_mi.c deleted file mode 100644 index 3190c8e..0000000 --- a/mt/af_matmul/matmul_mi.c +++ /dev/null @@ -1,250 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: Felix Li $ Ronald Lee -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i,j,k,l; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i<32; i+=2){ - row = i*32; - row2 = (i+1)*32; - for (j=0; j<16; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - - - } - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - - } - } - else { - for (i=0; i<32; i+=2){ - row = (31-i)*32; - row2 = (31-i-1)*32; - for (j=16; j<32; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - - - - } - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} diff --git a/mt/af_matmul/matmul_mi.c~ b/mt/af_matmul/matmul_mi.c~ deleted file mode 100644 index 4ac4de7..0000000 --- a/mt/af_matmul/matmul_mi.c~ +++ /dev/null @@ -1,248 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: Felix Li $ Ronald Lee -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i,j,k,l; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i<32; i+=2){ - row = i*32; - row2 = (i+1)*32; - for (j=0; j<16; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==12){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - else if (coreid==1){ - for (i=0; i<32; i+=2){ - row = (31-i)*32; - row2 = (31-i-1)*32; - for (j=16; j<32; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==28){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(); - - - // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - -#ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); - - exit(0); -} diff --git a/mt/af_vvadd/af_vvadd.c b/mt/af_vvadd/af_vvadd.c deleted file mode 100755 index 5c6b1e9..0000000 --- a/mt/af_vvadd/af_vvadd.c +++ /dev/null @@ -1,178 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - - size_t i; - - if (coreid == 0) - { - for (i = 0; i < n/2; i++) { - x[i] = x[i] + y[i]; - } - } else { - for (i = n/2; i < n; i++) { - x[i] = x[i] + y[i]; - } - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/af_vvadd/dataset.h b/mt/af_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/af_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/af_vvadd/vvadd_gendata.pl b/mt/af_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/af_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ag_matmul.c b/mt/ag_matmul.c new file mode 100755 index 0000000..0b9cc6e --- /dev/null +++ b/mt/ag_matmul.c @@ -0,0 +1,79 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +#include "util.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k; + + for ( i = 0; i < lda; i+=2 ) + { + for (k = 0; k < lda; k+=4) + { + int d0 = B[k*lda + i]; + int c0 = B[k*lda + i + 1]; + int d1 = B[(k+1)*lda + i]; + int c1 = B[(k+1)*lda + i + 1]; + int d2 = B[(k+2)*lda + i]; + int c2 = B[(k+2)*lda + i + 1]; + int d3 = B[(k+3)*lda + i]; + int c3 = B[(k+3)*lda + i + 1]; + + for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4) + { + + int sum = A[j*lda + k] * d0; + sum += A[j*lda + k + 1] * d1; + sum += A[j*lda + k + 2] * d2; + sum += A[j*lda + k + 3] * d3; + C[j*lda +i] += sum; + + sum = A[j*lda + k] * c0; + sum += A[j*lda + k + 1] * c1; + sum += A[j*lda + k + 2] * c2; + sum += A[j*lda + k + 3] * c3; + C[j*lda + i + 1] += sum; + + sum = A[(j+1)*lda + k] * d0; + sum += A[(j+1)*lda + k + 1] * d1; + sum += A[(j+1)*lda + k + 2] * d2; + sum += A[(j+1)*lda + k + 3] * d3; + C[(j+1)*lda +i] += sum; + + sum = A[(j+1)*lda + k] * c0; + sum += A[(j+1)*lda + k + 1] * c1; + sum += A[(j+1)*lda + k + 2] * c2; + sum += A[(j+1)*lda + k + 3] * c3; + C[(j+1)*lda + i + 1] += sum; + + sum = A[(j+2)*lda + k] * d0; + sum += A[(j+2)*lda + k + 1] * d1; + sum += A[(j+2)*lda + k + 2] * d2; + sum += A[(j+2)*lda + k + 3] * d3; + C[(j+2)*lda +i] += sum; + + sum = A[(j+2)*lda + k] * c0; + sum += A[(j+2)*lda + k + 1] * c1; + sum += A[(j+2)*lda + k + 2] * c2; + sum += A[(j+2)*lda + k + 3] * c3; + C[(j+2)*lda + i + 1] += sum; + + sum = A[(j+3)*lda + k] * d0; + sum += A[(j+3)*lda + k + 1] * d1; + sum += A[(j+3)*lda + k + 2] * d2; + sum += A[(j+3)*lda + k + 3] * d3; + C[(j+3)*lda +i] += sum; + + sum = A[(j+3)*lda + k] * c0; + sum += A[(j+3)*lda + k + 1] * c1; + sum += A[(j+3)*lda + k + 2] * c2; + sum += A[(j+3)*lda + k + 3] * c3; + C[(j+3)*lda + i + 1] += sum; + + } + barrier(ncores); + } + } +} diff --git a/mt/ag_matmul/ag_matmul.c b/mt/ag_matmul/ag_matmul.c deleted file mode 100755 index bd470eb..0000000 --- a/mt/ag_matmul/ag_matmul.c +++ /dev/null @@ -1,230 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ -unsigned long _c = -rdcycle(), _i = -rdinstret(); \ -code; \ -_c += rdcycle(), _i += rdinstret(); \ -if (coreid == 0) \ -printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ -stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ -} while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - for ( i = 0; i < lda; i+=2 ) - { - for (k = 0; k < lda; k+=4) - { - int d0 = B[k*lda + i]; - int c0 = B[k*lda + i + 1]; - int d1 = B[(k+1)*lda + i]; - int c1 = B[(k+1)*lda + i + 1]; - int d2 = B[(k+2)*lda + i]; - int c2 = B[(k+2)*lda + i + 1]; - int d3 = B[(k+3)*lda + i]; - int c3 = B[(k+3)*lda + i + 1]; - - for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4) - { - - int sum = A[j*lda + k] * d0; - sum += A[j*lda + k + 1] * d1; - sum += A[j*lda + k + 2] * d2; - sum += A[j*lda + k + 3] * d3; - C[j*lda +i] += sum; - - sum = A[j*lda + k] * c0; - sum += A[j*lda + k + 1] * c1; - sum += A[j*lda + k + 2] * c2; - sum += A[j*lda + k + 3] * c3; - C[j*lda + i + 1] += sum; - - sum = A[(j+1)*lda + k] * d0; - sum += A[(j+1)*lda + k + 1] * d1; - sum += A[(j+1)*lda + k + 2] * d2; - sum += A[(j+1)*lda + k + 3] * d3; - C[(j+1)*lda +i] += sum; - - sum = A[(j+1)*lda + k] * c0; - sum += A[(j+1)*lda + k + 1] * c1; - sum += A[(j+1)*lda + k + 2] * c2; - sum += A[(j+1)*lda + k + 3] * c3; - C[(j+1)*lda + i + 1] += sum; - - sum = A[(j+2)*lda + k] * d0; - sum += A[(j+2)*lda + k + 1] * d1; - sum += A[(j+2)*lda + k + 2] * d2; - sum += A[(j+2)*lda + k + 3] * d3; - C[(j+2)*lda +i] += sum; - - sum = A[(j+2)*lda + k] * c0; - sum += A[(j+2)*lda + k + 1] * c1; - sum += A[(j+2)*lda + k + 2] * c2; - sum += A[(j+2)*lda + k + 3] * c3; - C[(j+2)*lda + i + 1] += sum; - - sum = A[(j+3)*lda + k] * d0; - sum += A[(j+3)*lda + k + 1] * d1; - sum += A[(j+3)*lda + k + 2] * d2; - sum += A[(j+3)*lda + k + 3] * d3; - C[(j+3)*lda +i] += sum; - - sum = A[(j+3)*lda + k] * c0; - sum += A[(j+3)*lda + k + 1] * c1; - sum += A[(j+3)*lda + k + 2] * c2; - sum += A[(j+3)*lda + k + 3] * c3; - C[(j+3)*lda + i + 1] += sum; - - } - barrier(ncores); - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ag_matmul/dataset.h b/mt/ag_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ag_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ag_matmul/matmul_gendata.pl b/mt/ag_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ag_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ag_matmul/matmul_mi.c b/mt/ag_matmul/matmul_mi.c deleted file mode 100755 index 3352c56..0000000 --- a/mt/ag_matmul/matmul_mi.c +++ /dev/null @@ -1,230 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ -unsigned long _c = -rdcycle(), _i = -rdinstret(); \ -code; \ -_c += rdcycle(), _i += rdinstret(); \ -if (coreid == 0) \ -printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ -stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ -} while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - for ( i = 0; i < lda; i+=2 ) - { - for (k = 0; k < lda; k+=4) - { - int d0 = B[k*lda + i]; - int c0 = B[k*lda + i + 1]; - int d1 = B[(k+1)*lda + i]; - int c1 = B[(k+1)*lda + i + 1]; - int d2 = B[(k+2)*lda + i]; - int c2 = B[(k+2)*lda + i + 1]; - int d3 = B[(k+3)*lda + i]; - int c3 = B[(k+3)*lda + i + 1]; - - for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4) - { - - int sum = A[j*lda + k] * d0; - sum += A[j*lda + k + 1] * d1; - sum += A[j*lda + k + 2] * d2; - sum += A[j*lda + k + 3] * d3; - C[j*lda +i] += sum; - - sum = A[j*lda + k] * c0; - sum += A[j*lda + k + 1] * c1; - sum += A[j*lda + k + 2] * c2; - sum += A[j*lda + k + 3] * c3; - C[j*lda + i + 1] += sum; - - sum = A[(j+1)*lda + k] * d0; - sum += A[(j+1)*lda + k + 1] * d1; - sum += A[(j+1)*lda + k + 2] * d2; - sum += A[(j+1)*lda + k + 3] * d3; - C[(j+1)*lda +i] += sum; - - sum = A[(j+1)*lda + k] * c0; - sum += A[(j+1)*lda + k + 1] * c1; - sum += A[(j+1)*lda + k + 2] * c2; - sum += A[(j+1)*lda + k + 3] * c3; - C[(j+1)*lda + i + 1] += sum; - - sum = A[(j+2)*lda + k] * d0; - sum += A[(j+2)*lda + k + 1] * d1; - sum += A[(j+2)*lda + k + 2] * d2; - sum += A[(j+2)*lda + k + 3] * d3; - C[(j+2)*lda +i] += sum; - - sum = A[(j+2)*lda + k] * c0; - sum += A[(j+2)*lda + k + 1] * c1; - sum += A[(j+2)*lda + k + 2] * c2; - sum += A[(j+2)*lda + k + 3] * c3; - C[(j+2)*lda + i + 1] += sum; - - sum = A[(j+3)*lda + k] * d0; - sum += A[(j+3)*lda + k + 1] * d1; - sum += A[(j+3)*lda + k + 2] * d2; - sum += A[(j+3)*lda + k + 3] * d3; - C[(j+3)*lda +i] += sum; - - sum = A[(j+3)*lda + k] * c0; - sum += A[(j+3)*lda + k + 1] * c1; - sum += A[(j+3)*lda + k + 2] * c2; - sum += A[(j+3)*lda + k + 3] * c3; - C[(j+3)*lda + i + 1] += sum; - - } - barrier(nc); - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ag_vvadd/ag_vvadd.c b/mt/ag_vvadd/ag_vvadd.c deleted file mode 100755 index 51aa384..0000000 --- a/mt/ag_vvadd/ag_vvadd.c +++ /dev/null @@ -1,171 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - - for (i = coreid*(n/2); i < (coreid+1)*(n/2); i++){ - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ag_vvadd/dataset.h b/mt/ag_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ag_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ag_vvadd/vvadd_gendata.pl b/mt/ag_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ag_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ai_matmul.c b/mt/ai_matmul.c new file mode 100755 index 0000000..9d808f3 --- /dev/null +++ b/mt/ai_matmul.c @@ -0,0 +1,72 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +#include "util.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + +//----------MSI-------------- +///* + int i,j,k; + barrier(ncores); + for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { + for(i = 0; i < lda; i+=4) { + data_t Cval0 = 0; + data_t Cval1 = 0; + data_t Cval2 = 0; + data_t Cval3 = 0; + for(k = 0; k < lda; k++) { + Cval0 += A[j*lda+k]*B[k*lda+i]; + Cval1 += A[j*lda+k]*B[k*lda+i+1]; + Cval2 += A[j*lda+k]*B[k*lda+i+2]; + Cval3 += A[j*lda+k]*B[k*lda+i+3]; + } + C[j*lda+i] = Cval0; + C[j*lda+i+1] = Cval1; + C[j*lda+i+2] = Cval2; + C[j*lda+i+3] = Cval3; + } + } +//*/ + +//------------------MI------------------- +/* + int i,j,k; + barrier(nc); + for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { + for(i = 0; i < lda; i+=4) { + data_t Cval0 = 0; + data_t Cval1 = 0; + data_t Cval2 = 0; + data_t Cval3 = 0; + if(coreid == 0) { + for(k = 0; k < lda; k++) { + Cval0 += A[j*lda+k]*B[k*lda+i]; + Cval1 += A[j*lda+k]*B[k*lda+i+1]; + Cval2 += A[j*lda+k]*B[k*lda+i+2]; + Cval3 += A[j*lda+k]*B[k*lda+i+3]; + } + } else { + for(k = lda-1; k >= 0; k--) { + Cval0 += A[j*lda+k]*B[k*lda+i]; + Cval1 += A[j*lda+k]*B[k*lda+i+1]; + Cval2 += A[j*lda+k]*B[k*lda+i+2]; + Cval3 += A[j*lda+k]*B[k*lda+i+3]; + } + } + C[j*lda+i] = Cval0; + C[j*lda+i+1] = Cval1; + C[j*lda+i+2] = Cval2; + C[j*lda+i+3] = Cval3; + } + } +*/ +} diff --git a/mt/ai_matmul/ai_matmul.c b/mt/ai_matmul/ai_matmul.c deleted file mode 100755 index f9640f4..0000000 --- a/mt/ai_matmul/ai_matmul.c +++ /dev/null @@ -1,222 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -//----------MSI-------------- -///* - int i,j,k; - barrier(ncores); - for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { - for(i = 0; i < lda; i+=4) { - data_t Cval0 = 0; - data_t Cval1 = 0; - data_t Cval2 = 0; - data_t Cval3 = 0; - for(k = 0; k < lda; k++) { - Cval0 += A[j*lda+k]*B[k*lda+i]; - Cval1 += A[j*lda+k]*B[k*lda+i+1]; - Cval2 += A[j*lda+k]*B[k*lda+i+2]; - Cval3 += A[j*lda+k]*B[k*lda+i+3]; - } - C[j*lda+i] = Cval0; - C[j*lda+i+1] = Cval1; - C[j*lda+i+2] = Cval2; - C[j*lda+i+3] = Cval3; - } - } -//*/ - -//------------------MI------------------- -/* - int i,j,k; - barrier(nc); - for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { - for(i = 0; i < lda; i+=4) { - data_t Cval0 = 0; - data_t Cval1 = 0; - data_t Cval2 = 0; - data_t Cval3 = 0; - if(coreid == 0) { - for(k = 0; k < lda; k++) { - Cval0 += A[j*lda+k]*B[k*lda+i]; - Cval1 += A[j*lda+k]*B[k*lda+i+1]; - Cval2 += A[j*lda+k]*B[k*lda+i+2]; - Cval3 += A[j*lda+k]*B[k*lda+i+3]; - } - } else { - for(k = lda-1; k >= 0; k--) { - Cval0 += A[j*lda+k]*B[k*lda+i]; - Cval1 += A[j*lda+k]*B[k*lda+i+1]; - Cval2 += A[j*lda+k]*B[k*lda+i+2]; - Cval3 += A[j*lda+k]*B[k*lda+i+3]; - } - } - C[j*lda+i] = Cval0; - C[j*lda+i+1] = Cval1; - C[j*lda+i+2] = Cval2; - C[j*lda+i+3] = Cval3; - } - } -*/ -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ai_matmul/dataset.h b/mt/ai_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ai_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ai_matmul/matmul_gendata.pl b/mt/ai_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ai_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ai_matmul/matmul_mi.c b/mt/ai_matmul/matmul_mi.c deleted file mode 100755 index 154569c..0000000 --- a/mt/ai_matmul/matmul_mi.c +++ /dev/null @@ -1,221 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -//----------MSI-------------- -/* - int i,j,k; - barrier(nc); - for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { - for(i = 0; i < lda; i+=4) { - data_t Cval0 = 0; - data_t Cval1 = 0; - data_t Cval2 = 0; - data_t Cval3 = 0; - for(k = 0; k < lda; k++) { - Cval0 += A[j*lda+k]*B[k*lda+i]; - Cval1 += A[j*lda+k]*B[k*lda+i+1]; - Cval2 += A[j*lda+k]*B[k*lda+i+2]; - Cval3 += A[j*lda+k]*B[k*lda+i+3]; - } - C[j*lda+i] = Cval0; - C[j*lda+i+1] = Cval1; - C[j*lda+i+2] = Cval2; - C[j*lda+i+3] = Cval3; - } - } -*/ - -//------------------MI------------------- - - int i,j,k; - barrier(nc); - for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { - for(i = 0; i < lda; i+=4) { - data_t Cval0 = 0; - data_t Cval1 = 0; - data_t Cval2 = 0; - data_t Cval3 = 0; - if(coreid == 0) { - for(k = 0; k < lda; k++) { - Cval0 += A[j*lda+k]*B[k*lda+i]; - Cval1 += A[j*lda+k]*B[k*lda+i+1]; - Cval2 += A[j*lda+k]*B[k*lda+i+2]; - Cval3 += A[j*lda+k]*B[k*lda+i+3]; - } - } else { - for(k = lda-1; k >= 0; k--) { - Cval0 += A[j*lda+k]*B[k*lda+i]; - Cval1 += A[j*lda+k]*B[k*lda+i+1]; - Cval2 += A[j*lda+k]*B[k*lda+i+2]; - Cval3 += A[j*lda+k]*B[k*lda+i+3]; - } - } - C[j*lda+i] = Cval0; - C[j*lda+i+1] = Cval1; - C[j*lda+i+2] = Cval2; - C[j*lda+i+3] = Cval3; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ai_vvadd/ai_vvadd.c b/mt/ai_vvadd/ai_vvadd.c deleted file mode 100755 index 64d1774..0000000 --- a/mt/ai_vvadd/ai_vvadd.c +++ /dev/null @@ -1,170 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - - for (i = coreid*n/ncores; i < coreid*n/ncores + n/ncores; i++) { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ai_vvadd/dataset.h b/mt/ai_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ai_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ai_vvadd/vvadd_gendata.pl b/mt/ai_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ai_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/aj_matmul/aj_matmul.c b/mt/aj_matmul/aj_matmul.c deleted file mode 100755 index 445f924..0000000 --- a/mt/aj_matmul/aj_matmul.c +++ /dev/null @@ -1,380 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -void matrix_sub(int size, data_t A[], data_t B[], data_t C[]) { - if (coreid != 0) - return; - - for(int i = 0; i < size; i++){ - C[i] = A[i] + B[i]; - } -} - -void matrix_add(int size, data_t A[], data_t B[], data_t C[]) { - if (coreid != 0) - return; - - for(int i = 0; i < size; i++){ - C[i] = A[i] - B[i]; - } -} - -void strassen_mult(int dime, const data_t sA[], const data_t sB[], data_t sC[]) { - - if (coreid != 0) - return; - - int height, width; - int sub_size = dime*dime/4; - -// data_t A_11[sub_size], B_11[sub_size], C_11[sub_size], -// A_12[sub_size], B_12[sub_size], C_12[sub_size], -// A_21[sub_size], B_21[sub_size], C_21[sub_size], -// A_22[sub_size], B_22[sub_size], C_22[sub_size]; - - data_t *A_11 = malloc(sub_size*sizeof(data_t)); - data_t *A_12 = malloc(sub_size*sizeof(data_t)); - data_t *A_21 = malloc(sub_size*sizeof(data_t)); - data_t *A_22 = malloc(sub_size*sizeof(data_t)); - data_t *B_11 = malloc(sub_size*sizeof(data_t)); - data_t *B_12 = malloc(sub_size*sizeof(data_t)); - data_t *B_21 = malloc(sub_size*sizeof(data_t)); - data_t *B_22 = malloc(sub_size*sizeof(data_t)); - - for(height=0; height < dime/2; height++) { - for(width= 0; width < dime/2; width++) { - A_11[width+(height*dime/2)] = sA[width + height*dime]; - B_11[width+(height*dime/2)] = sB[width + height*dime]; - - A_12[width+(height*dime/2)] = sA[dime/2 + width + height*dime]; - B_12[width+(height*dime/2)] = sB[dime/2 + width + height*dime]; - - A_21[width+(height*dime/2)] = sA[(dime*dime)/2 + width + height*dime]; - B_21[width+(height*dime/2)] = sB[(dime*dime)/2 + width + height*dime]; - - A_22[width+(height*dime/2)] = sA[(dime*dime)/2 + dime/2 + width + height*dime]; - B_22[width+(height*dime/2)] = sB[(dime*dime)/2 + dime/2 + width + height*dime]; - } - } - -// data_t H_1[sub_size], H_2[sub_size], H_3[sub_size], H_4[sub_size], H_5[sub_size], -// H_6[sub_size], H_7[sub_size], H_8[sub_size], H_9[sub_size], H_10[sub_size], -// H_11[sub_size], H_12[sub_size], H_13[sub_size], H_14[sub_size], -// H_15[sub_size], H_16[sub_size], H_17[sub_size], H_18[sub_size]; - - data_t *H_1 = malloc(sub_size*sizeof(data_t)); - data_t *H_2 = malloc(sub_size*sizeof(data_t)); - data_t *H_3 = malloc(sub_size*sizeof(data_t)); - data_t *H_4 = malloc(sub_size*sizeof(data_t)); - data_t *H_5 = malloc(sub_size*sizeof(data_t)); - data_t *H_6 = malloc(sub_size*sizeof(data_t)); - data_t *H_7 = malloc(sub_size*sizeof(data_t)); - data_t *H_8 = malloc(sub_size*sizeof(data_t)); - data_t *H_9 = malloc(sub_size*sizeof(data_t)); - data_t *H_10 = malloc(sub_size*sizeof(data_t)); - - matrix_add(sub_size, A_11, A_22, H_1); //Helper1 - matrix_add(sub_size, B_11, B_22, H_2); //Helper2 - matrix_add(sub_size, A_21, A_22, H_3); //Helper3 - matrix_sub(sub_size, B_12, B_22, H_4); //Helper4 - matrix_sub(sub_size, B_21, B_11, H_5); //Helper5 - matrix_add(sub_size, A_11, A_12, H_6); //Helper6 - matrix_sub(sub_size, A_21, A_11, H_7); //Helper7 - matrix_add(sub_size, B_11, B_12, H_8); //Helper8 - matrix_sub(sub_size, A_12, A_22, H_9); //Helper9 - matrix_add(sub_size, B_21, B_22, H_10); //Helper10 - - free(A_12); - free(A_21); - free(B_12); - free(B_21); - - A_12 = NULL; - A_21 = NULL; - B_12 = NULL; - B_21 = NULL; - -// data_t M_1[sub_size], M_2[sub_size], M_3[sub_size], M_4[sub_size], -// M_5[sub_size], M_6[sub_size], M_7[sub_size]; - - data_t *M_1 = malloc(sub_size*sizeof(data_t)); - data_t *M_2 = malloc(sub_size*sizeof(data_t)); - data_t *M_3 = malloc(sub_size*sizeof(data_t)); - data_t *M_4 = malloc(sub_size*sizeof(data_t)); - data_t *M_5 = malloc(sub_size*sizeof(data_t)); - data_t *M_6 = malloc(sub_size*sizeof(data_t)); - data_t *M_7 = malloc(sub_size*sizeof(data_t)); - - if (sub_size == 1) { - M_1[0] = H_1[0]*H_2[0]; - M_2[0] = H_3[0]*B_11[0]; - M_3[0] = A_11[0]*H_4[0]; - M_4[0] = A_22[0]*H_5[0]; - M_5[0] = H_6[0]*B_22[0]; - M_6[0] = H_7[0]*H_8[0]; - M_7[0] = H_9[0]*H_10[0]; - } else { - strassen_mult(dime/2, H_1, H_2, M_1); - strassen_mult(dime/2, H_3, B_11, M_2); - strassen_mult(dime/2, A_11, H_4, M_3); - strassen_mult(dime/2, A_22, H_5, M_4); - strassen_mult(dime/2, H_6, B_22, M_5); - strassen_mult(dime/2, H_7, H_8, M_6); - strassen_mult(dime/2, H_9, H_10, M_7); - } - - free(A_11); - free(A_22); - free(B_11); - free(B_22); - - A_11 = NULL; - A_22 = NULL; - B_11 = NULL; - B_22 = NULL; - - free(H_1); - free(H_2); - free(H_3); - free(H_4); - free(H_5); - free(H_6); - free(H_7); - free(H_8); - free(H_9); - free(H_10); - - H_1 = NULL; - H_2 = NULL; - H_3 = NULL; - H_4 = NULL; - H_5 = NULL; - H_6 = NULL; - H_7 = NULL; - H_8 = NULL; - H_9 = NULL; - H_10 = NULL; - - data_t *H_11 = malloc(sub_size*sizeof(data_t)); - data_t *H_12 = malloc(sub_size*sizeof(data_t)); - data_t *H_13 = malloc(sub_size*sizeof(data_t)); - data_t *H_14 = malloc(sub_size*sizeof(data_t)); - - data_t *C_11 = malloc(sub_size*sizeof(data_t)); - data_t *C_12 = malloc(sub_size*sizeof(data_t)); - data_t *C_21 = malloc(sub_size*sizeof(data_t)); - data_t *C_22 = malloc(sub_size*sizeof(data_t)); - - matrix_add(sub_size, M_1, M_4, H_11); - matrix_add(sub_size, M_5, M_7, H_12); - matrix_sub(sub_size, H_11, H_12, C_11); - - matrix_add(sub_size, M_3, M_5, C_12); - - matrix_add(sub_size, M_2, M_4, C_21); - - matrix_sub(sub_size, M_1, M_2, H_13); - matrix_add(sub_size, M_3, M_6, H_14); - matrix_add(sub_size, H_13, H_14, C_22); - - free(H_11); - free(H_12); - free(H_13); - free(H_14); - - H_11 = NULL; - H_12 = NULL; - H_13 = NULL; - H_14 = NULL; - - - for(height=0; height < dime/2; height++) { - for(width= 0; width < dime/2; width++) { - sC[width + height*dime] = C_11[width+(height*dime/2)]; - sC[dime/2 + width + height*dime] = C_12[width+(height*dime/2)]; - sC[(dime*dime)/2 + width + height*dime] = C_21[width+(height*dime/2)]; - sC[(dime*dime)/2 + dime/2 + width + height*dime] = C_22[width+(height*dime/2)]; - } - } - - free(C_11); - free(C_12); - free(C_21); - free(C_22); - - C_11 = NULL; - C_12 = NULL; - C_21 = NULL; - C_22 = NULL; - -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - if (coreid > 0) - return; - - strassen_mult(lda, A, B, C); - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/aj_matmul/dataset.h b/mt/aj_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/aj_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/aj_matmul/matmul_gendata.pl b/mt/aj_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/aj_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/aj_matmul/matmul_mi.c b/mt/aj_matmul/matmul_mi.c deleted file mode 100644 index 445f924..0000000 --- a/mt/aj_matmul/matmul_mi.c +++ /dev/null @@ -1,380 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -void matrix_sub(int size, data_t A[], data_t B[], data_t C[]) { - if (coreid != 0) - return; - - for(int i = 0; i < size; i++){ - C[i] = A[i] + B[i]; - } -} - -void matrix_add(int size, data_t A[], data_t B[], data_t C[]) { - if (coreid != 0) - return; - - for(int i = 0; i < size; i++){ - C[i] = A[i] - B[i]; - } -} - -void strassen_mult(int dime, const data_t sA[], const data_t sB[], data_t sC[]) { - - if (coreid != 0) - return; - - int height, width; - int sub_size = dime*dime/4; - -// data_t A_11[sub_size], B_11[sub_size], C_11[sub_size], -// A_12[sub_size], B_12[sub_size], C_12[sub_size], -// A_21[sub_size], B_21[sub_size], C_21[sub_size], -// A_22[sub_size], B_22[sub_size], C_22[sub_size]; - - data_t *A_11 = malloc(sub_size*sizeof(data_t)); - data_t *A_12 = malloc(sub_size*sizeof(data_t)); - data_t *A_21 = malloc(sub_size*sizeof(data_t)); - data_t *A_22 = malloc(sub_size*sizeof(data_t)); - data_t *B_11 = malloc(sub_size*sizeof(data_t)); - data_t *B_12 = malloc(sub_size*sizeof(data_t)); - data_t *B_21 = malloc(sub_size*sizeof(data_t)); - data_t *B_22 = malloc(sub_size*sizeof(data_t)); - - for(height=0; height < dime/2; height++) { - for(width= 0; width < dime/2; width++) { - A_11[width+(height*dime/2)] = sA[width + height*dime]; - B_11[width+(height*dime/2)] = sB[width + height*dime]; - - A_12[width+(height*dime/2)] = sA[dime/2 + width + height*dime]; - B_12[width+(height*dime/2)] = sB[dime/2 + width + height*dime]; - - A_21[width+(height*dime/2)] = sA[(dime*dime)/2 + width + height*dime]; - B_21[width+(height*dime/2)] = sB[(dime*dime)/2 + width + height*dime]; - - A_22[width+(height*dime/2)] = sA[(dime*dime)/2 + dime/2 + width + height*dime]; - B_22[width+(height*dime/2)] = sB[(dime*dime)/2 + dime/2 + width + height*dime]; - } - } - -// data_t H_1[sub_size], H_2[sub_size], H_3[sub_size], H_4[sub_size], H_5[sub_size], -// H_6[sub_size], H_7[sub_size], H_8[sub_size], H_9[sub_size], H_10[sub_size], -// H_11[sub_size], H_12[sub_size], H_13[sub_size], H_14[sub_size], -// H_15[sub_size], H_16[sub_size], H_17[sub_size], H_18[sub_size]; - - data_t *H_1 = malloc(sub_size*sizeof(data_t)); - data_t *H_2 = malloc(sub_size*sizeof(data_t)); - data_t *H_3 = malloc(sub_size*sizeof(data_t)); - data_t *H_4 = malloc(sub_size*sizeof(data_t)); - data_t *H_5 = malloc(sub_size*sizeof(data_t)); - data_t *H_6 = malloc(sub_size*sizeof(data_t)); - data_t *H_7 = malloc(sub_size*sizeof(data_t)); - data_t *H_8 = malloc(sub_size*sizeof(data_t)); - data_t *H_9 = malloc(sub_size*sizeof(data_t)); - data_t *H_10 = malloc(sub_size*sizeof(data_t)); - - matrix_add(sub_size, A_11, A_22, H_1); //Helper1 - matrix_add(sub_size, B_11, B_22, H_2); //Helper2 - matrix_add(sub_size, A_21, A_22, H_3); //Helper3 - matrix_sub(sub_size, B_12, B_22, H_4); //Helper4 - matrix_sub(sub_size, B_21, B_11, H_5); //Helper5 - matrix_add(sub_size, A_11, A_12, H_6); //Helper6 - matrix_sub(sub_size, A_21, A_11, H_7); //Helper7 - matrix_add(sub_size, B_11, B_12, H_8); //Helper8 - matrix_sub(sub_size, A_12, A_22, H_9); //Helper9 - matrix_add(sub_size, B_21, B_22, H_10); //Helper10 - - free(A_12); - free(A_21); - free(B_12); - free(B_21); - - A_12 = NULL; - A_21 = NULL; - B_12 = NULL; - B_21 = NULL; - -// data_t M_1[sub_size], M_2[sub_size], M_3[sub_size], M_4[sub_size], -// M_5[sub_size], M_6[sub_size], M_7[sub_size]; - - data_t *M_1 = malloc(sub_size*sizeof(data_t)); - data_t *M_2 = malloc(sub_size*sizeof(data_t)); - data_t *M_3 = malloc(sub_size*sizeof(data_t)); - data_t *M_4 = malloc(sub_size*sizeof(data_t)); - data_t *M_5 = malloc(sub_size*sizeof(data_t)); - data_t *M_6 = malloc(sub_size*sizeof(data_t)); - data_t *M_7 = malloc(sub_size*sizeof(data_t)); - - if (sub_size == 1) { - M_1[0] = H_1[0]*H_2[0]; - M_2[0] = H_3[0]*B_11[0]; - M_3[0] = A_11[0]*H_4[0]; - M_4[0] = A_22[0]*H_5[0]; - M_5[0] = H_6[0]*B_22[0]; - M_6[0] = H_7[0]*H_8[0]; - M_7[0] = H_9[0]*H_10[0]; - } else { - strassen_mult(dime/2, H_1, H_2, M_1); - strassen_mult(dime/2, H_3, B_11, M_2); - strassen_mult(dime/2, A_11, H_4, M_3); - strassen_mult(dime/2, A_22, H_5, M_4); - strassen_mult(dime/2, H_6, B_22, M_5); - strassen_mult(dime/2, H_7, H_8, M_6); - strassen_mult(dime/2, H_9, H_10, M_7); - } - - free(A_11); - free(A_22); - free(B_11); - free(B_22); - - A_11 = NULL; - A_22 = NULL; - B_11 = NULL; - B_22 = NULL; - - free(H_1); - free(H_2); - free(H_3); - free(H_4); - free(H_5); - free(H_6); - free(H_7); - free(H_8); - free(H_9); - free(H_10); - - H_1 = NULL; - H_2 = NULL; - H_3 = NULL; - H_4 = NULL; - H_5 = NULL; - H_6 = NULL; - H_7 = NULL; - H_8 = NULL; - H_9 = NULL; - H_10 = NULL; - - data_t *H_11 = malloc(sub_size*sizeof(data_t)); - data_t *H_12 = malloc(sub_size*sizeof(data_t)); - data_t *H_13 = malloc(sub_size*sizeof(data_t)); - data_t *H_14 = malloc(sub_size*sizeof(data_t)); - - data_t *C_11 = malloc(sub_size*sizeof(data_t)); - data_t *C_12 = malloc(sub_size*sizeof(data_t)); - data_t *C_21 = malloc(sub_size*sizeof(data_t)); - data_t *C_22 = malloc(sub_size*sizeof(data_t)); - - matrix_add(sub_size, M_1, M_4, H_11); - matrix_add(sub_size, M_5, M_7, H_12); - matrix_sub(sub_size, H_11, H_12, C_11); - - matrix_add(sub_size, M_3, M_5, C_12); - - matrix_add(sub_size, M_2, M_4, C_21); - - matrix_sub(sub_size, M_1, M_2, H_13); - matrix_add(sub_size, M_3, M_6, H_14); - matrix_add(sub_size, H_13, H_14, C_22); - - free(H_11); - free(H_12); - free(H_13); - free(H_14); - - H_11 = NULL; - H_12 = NULL; - H_13 = NULL; - H_14 = NULL; - - - for(height=0; height < dime/2; height++) { - for(width= 0; width < dime/2; width++) { - sC[width + height*dime] = C_11[width+(height*dime/2)]; - sC[dime/2 + width + height*dime] = C_12[width+(height*dime/2)]; - sC[(dime*dime)/2 + width + height*dime] = C_21[width+(height*dime/2)]; - sC[(dime*dime)/2 + dime/2 + width + height*dime] = C_22[width+(height*dime/2)]; - } - } - - free(C_11); - free(C_12); - free(C_21); - free(C_22); - - C_11 = NULL; - C_12 = NULL; - C_21 = NULL; - C_22 = NULL; - -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - if (coreid > 0) - return; - - strassen_mult(lda, A, B, C); - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/aj_vvadd/aj_vvadd.c b/mt/aj_vvadd/aj_vvadd.c deleted file mode 100755 index 0096209..0000000 --- a/mt/aj_vvadd/aj_vvadd.c +++ /dev/null @@ -1,168 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - for (i = (n/ncores)*coreid; i < (n/ncores)*(coreid+1); i++) - { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/aj_vvadd/dataset.h b/mt/aj_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/aj_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/aj_vvadd/vvadd_gendata.pl b/mt/aj_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/aj_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ak_matmul.c b/mt/ak_matmul.c new file mode 100755 index 0000000..e4b34e4 --- /dev/null +++ b/mt/ak_matmul.c @@ -0,0 +1,62 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int i, j, k, ii, jj, bsize; + bsize = 16; + for ( jj = bsize*coreid; jj < lda; jj += bsize*ncores) { + for ( ii = 0; ii < lda; ii += bsize) { + for ( j = jj; j < lda && j < jj + bsize; j++) { + for ( i = ii; i < lda && i < ii + bsize; i += 8) { + data_t c1 = C[i + j*lda]; + data_t c2 = C[i + j*lda + 1]; + data_t c3 = C[i + j*lda + 2]; + data_t c4 = C[i + j*lda + 3]; + data_t c5 = C[i + j*lda + 4]; + data_t c6 = C[i + j*lda + 5]; + data_t c7 = C[i + j*lda + 6]; + data_t c8 = C[i + j*lda + 7]; + for ( k = 0; k < lda; k+=4 ) { + for (int x = 0; x < 4; x++) { + data_t a = A[j*lda + k+x]; + data_t b1 = B[(k+x)*lda + i]; + data_t b2 = B[(k+x)*lda + i + 1]; + data_t b3 = B[(k+x)*lda + i + 2]; + data_t b4 = B[(k+x)*lda + i + 3]; + data_t b5 = B[(k+x)*lda + i + 4]; + data_t b6 = B[(k+x)*lda + i + 5]; + data_t b7 = B[(k+x)*lda + i + 6]; + data_t b8 = B[(k+x)*lda + i + 7]; + c1 += a * b1; + c2 += a * b2; + c3 += a * b3; + c4 += a * b4; + c5 += a * b5; + c6 += a * b6; + c7 += a * b7; + c8 += a * b8; + } + } + C[i + j*lda] = c1; + C[i + j*lda + 1] = c2; + C[i + j*lda + 2] = c3; + C[i + j*lda + 3] = c4; + C[i + j*lda + 4] = c5; + C[i + j*lda + 5] = c6; + C[i + j*lda + 6] = c7; + C[i + j*lda + 7] = c8; + } + } + } + } + +} diff --git a/mt/ak_matmul/ak_matmul.c b/mt/ak_matmul/ak_matmul.c deleted file mode 100755 index 4d803ee..0000000 --- a/mt/ak_matmul/ak_matmul.c +++ /dev/null @@ -1,213 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, bsize; - bsize = 16; - for ( jj = bsize*coreid; jj < lda; jj += bsize*ncores) { - for ( ii = 0; ii < lda; ii += bsize) { - for ( j = jj; j < lda && j < jj + bsize; j++) { - for ( i = ii; i < lda && i < ii + bsize; i += 8) { - data_t c1 = C[i + j*lda]; - data_t c2 = C[i + j*lda + 1]; - data_t c3 = C[i + j*lda + 2]; - data_t c4 = C[i + j*lda + 3]; - data_t c5 = C[i + j*lda + 4]; - data_t c6 = C[i + j*lda + 5]; - data_t c7 = C[i + j*lda + 6]; - data_t c8 = C[i + j*lda + 7]; - for ( k = 0; k < lda; k+=4 ) { - for (int x = 0; x < 4; x++) { - data_t a = A[j*lda + k+x]; - data_t b1 = B[(k+x)*lda + i]; - data_t b2 = B[(k+x)*lda + i + 1]; - data_t b3 = B[(k+x)*lda + i + 2]; - data_t b4 = B[(k+x)*lda + i + 3]; - data_t b5 = B[(k+x)*lda + i + 4]; - data_t b6 = B[(k+x)*lda + i + 5]; - data_t b7 = B[(k+x)*lda + i + 6]; - data_t b8 = B[(k+x)*lda + i + 7]; - c1 += a * b1; - c2 += a * b2; - c3 += a * b3; - c4 += a * b4; - c5 += a * b5; - c6 += a * b6; - c7 += a * b7; - c8 += a * b8; - } - } - C[i + j*lda] = c1; - C[i + j*lda + 1] = c2; - C[i + j*lda + 2] = c3; - C[i + j*lda + 3] = c4; - C[i + j*lda + 4] = c5; - C[i + j*lda + 5] = c6; - C[i + j*lda + 6] = c7; - C[i + j*lda + 7] = c8; - } - } - } - } - -} -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ak_matmul/dataset.h b/mt/ak_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ak_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ak_matmul/matmulMI.c b/mt/ak_matmul/matmulMI.c deleted file mode 100755 index e731501..0000000 --- a/mt/ak_matmul/matmulMI.c +++ /dev/null @@ -1,212 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, bsize, start; - bsize = 16; - start = bsize*coreid; - for ( jj = start; jj < lda; jj += bsize*ncores) { - int first = 1; - for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) { - first = 0; - for ( j = jj; j < lda && j < jj + bsize; j+=4) { - for ( i = ii; i < lda && i < ii + bsize; i+=2) { - data_t c1 = C[i + j*lda]; - data_t c2 = C[i + j*lda + 1]; - data_t c3 = C[i + (j+1)*lda]; - data_t c4 = C[i + (j+1)*lda + 1]; - data_t c5 = C[i + (j+2)*lda]; - data_t c6 = C[i + (j+2)*lda + 1]; - data_t c7 = C[i + (j+3)*lda]; - data_t c8 = C[i + (j+3)*lda + 1]; - for ( k = 0; k < lda; k+=8){ - for (int x = 0; x < 8; x++) { - data_t a = A[j*lda + k+x]; - data_t a1 = A[(j+1)*lda +k+x]; - data_t a2 = A[(j+2)*lda +k+x]; - data_t a3 = A[(j+3)*lda +k+x]; - data_t b1 = B[(k+x)*lda + i]; - data_t b2 = B[(k+x)*lda + i + 1]; - c1 += a * b1; - c2 += a * b2; - c3 += a1* b1; - c4 += a1* b2; - c5 += a2* b1; - c6 += a2* b2; - c7 += a3* b1; - c8 += a3* b2; - } - } - C[i + j*lda] = c1; - C[i + j*lda + 1] = c2; - C[i + (j+1)*lda] = c3; - C[i + (j+1)*lda + 1] = c4; - C[i + (j+2)*lda] = c5; - C[i + (j+2)*lda + 1] = c6; - C[i + (j+3)*lda] = c7; - C[i + (j+3)*lda + 1] = c8; - } - } - } - } -} -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ak_matmul/matmul_gendata.pl b/mt/ak_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ak_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ak_matmul/matmul_mi.c b/mt/ak_matmul/matmul_mi.c deleted file mode 100755 index aa41e32..0000000 --- a/mt/ak_matmul/matmul_mi.c +++ /dev/null @@ -1,212 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, bsize, start; - bsize = 16; - start = bsize*coreid; - for ( jj = start; jj < lda; jj += bsize*ncores) { - int first = 1; - for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) { - first = 0; - for ( j = jj; j < lda && j < jj + bsize; j+=4) { - for ( i = ii; i < lda && i < ii + bsize; i+=2) { - data_t c1 = C[i + j*lda]; - data_t c2 = C[i + j*lda + 1]; - data_t c3 = C[i + (j+1)*lda]; - data_t c4 = C[i + (j+1)*lda + 1]; - data_t c5 = C[i + (j+2)*lda]; - data_t c6 = C[i + (j+2)*lda + 1]; - data_t c7 = C[i + (j+3)*lda]; - data_t c8 = C[i + (j+3)*lda + 1]; - for ( k = 0; k < lda; k+=8){ - for (int x = 0; x < 8; x++) { - data_t a = A[j*lda + k+x]; - data_t a1 = A[(j+1)*lda +k+x]; - data_t a2 = A[(j+2)*lda +k+x]; - data_t a3 = A[(j+3)*lda +k+x]; - data_t b1 = B[(k+x)*lda + i]; - data_t b2 = B[(k+x)*lda + i + 1]; - c1 += a * b1; - c2 += a * b2; - c3 += a1* b1; - c4 += a1* b2; - c5 += a2* b1; - c6 += a2* b2; - c7 += a3* b1; - c8 += a3* b2; - } - } - C[i + j*lda] = c1; - C[i + j*lda + 1] = c2; - C[i + (j+1)*lda] = c3; - C[i + (j+1)*lda + 1] = c4; - C[i + (j+2)*lda] = c5; - C[i + (j+2)*lda + 1] = c6; - C[i + (j+3)*lda] = c7; - C[i + (j+3)*lda + 1] = c8; - } - } - } - } -} -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ak_vvadd/ak_vvadd.c b/mt/ak_vvadd/ak_vvadd.c deleted file mode 100755 index ee82f19..0000000 --- a/mt/ak_vvadd/ak_vvadd.c +++ /dev/null @@ -1,171 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - size_t size; - size_t start; - size = n / ncores; - start = coreid*size; - - for (i = start; (i < size + start) && i < n; i++) { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ak_vvadd/dataset.h b/mt/ak_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ak_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ak_vvadd/vvadd_gendata.pl b/mt/ak_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ak_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/al_matmul.c b/mt/al_matmul.c new file mode 100644 index 0000000..e5ee410 --- /dev/null +++ b/mt/al_matmul.c @@ -0,0 +1,123 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k, x; + data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; + + //complete Q1 + if(coreid > 1) return; + if(coreid == 0) { + for(j = 0; j < 32; j++) { + temp0 = C[j*lda]; + temp1 = C[1 + j*lda]; + temp2 = C[2 + j*lda]; + temp3 = C[3 + j*lda]; + temp4 = C[4 + j*lda]; + temp5 = C[5 + j*lda]; + temp6 = C[6 + j*lda]; + temp7 = C[7 + j*lda]; + temp8 = C[8 + j*lda]; + temp9 = C[9 + j*lda]; + temp10 = C[10 + j*lda]; + temp11 = C[11 + j*lda]; + temp12 = C[12 + j*lda]; + temp13 = C[13 + j*lda]; + temp14 = C[14 + j*lda]; + temp15 = C[15 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[k*lda]; + temp1 += A[j*lda + k] * B[1+k*lda]; + temp2 += A[j*lda + k] * B[2+k*lda]; + temp3 += A[j*lda + k] * B[3+k*lda]; + temp4 += A[j*lda + k] * B[4+k*lda]; + temp5 += A[j*lda + k] * B[5+k*lda]; + temp6 += A[j*lda + k] * B[6+k*lda]; + temp7 += A[j*lda + k] * B[7+k*lda]; + temp8 += A[j*lda + k] * B[8+k*lda]; + temp9 += A[j*lda + k] * B[9+k*lda]; + temp10 += A[j*lda + k] * B[10+k*lda]; + temp11 += A[j*lda + k] * B[11+k*lda]; + temp12 += A[j*lda + k] * B[12+k*lda]; + temp13 += A[j*lda + k] * B[13+k*lda]; + temp14 += A[j*lda + k] * B[14+k*lda]; + temp15 += A[j*lda + k] * B[15+k*lda]; + } + C[j*lda] = temp0; + C[1 + j*lda] = temp1; + C[2 + j*lda] = temp2; + C[3 + j*lda] = temp3; + C[4 + j*lda] = temp4; + C[5 + j*lda] = temp5; + C[6 + j*lda] = temp6; + C[7 + j*lda] = temp7; + C[8 + j*lda] = temp8; + C[9 + j*lda] = temp9; + C[10 + j*lda] = temp10; + C[11 + j*lda] = temp11; + C[12 + j*lda] = temp12; + C[13 + j*lda] = temp13; + C[14 + j*lda] = temp14; + C[15 + j*lda] = temp15; + } + } + + if( coreid == 1 || ncores == 1) { + for(j = 0; j < 32; j++) { + temp0 = C[16 + j*lda]; + temp1 = C[17 + j*lda]; + temp2 = C[18 + j*lda]; + temp3 = C[19 + j*lda]; + temp4 = C[20 + j*lda]; + temp5 = C[21 + j*lda]; + temp6 = C[22 + j*lda]; + temp7 = C[23 + j*lda]; + temp8 = C[24 + j*lda]; + temp9 = C[25 + j*lda]; + temp10 = C[26 + j*lda]; + temp11 = C[27 + j*lda]; + temp12 = C[28 + j*lda]; + temp13 = C[29 + j*lda]; + temp14 = C[30 + j*lda]; + temp15 = C[31 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[16 + k*lda]; + temp1 += A[j*lda + k] * B[17 + k*lda]; + temp2 += A[j*lda + k] * B[18 + k*lda]; + temp3 += A[j*lda + k] * B[19 + k*lda]; + temp4 += A[j*lda + k] * B[20 + k*lda]; + temp5 += A[j*lda + k] * B[21 + k*lda]; + temp6 += A[j*lda + k] * B[22 + k*lda]; + temp7 += A[j*lda + k] * B[23 + k*lda]; + temp8 += A[j*lda + k] * B[24 + k*lda]; + temp9 += A[j*lda + k] * B[25 + k*lda]; + temp10 += A[j*lda + k] * B[26 + k*lda]; + temp11 += A[j*lda + k] * B[27 + k*lda]; + temp12 += A[j*lda + k] * B[28 + k*lda]; + temp13 += A[j*lda + k] * B[29 + k*lda]; + temp14 += A[j*lda + k] * B[30 + k*lda]; + temp15 += A[j*lda + k] * B[31 + k*lda]; + } + C[16 + j*lda] = temp0; + C[17 + j*lda] = temp1; + C[18 + j*lda] = temp2; + C[19 + j*lda] = temp3; + C[20 + j*lda] = temp4; + C[21 + j*lda] = temp5; + C[22 + j*lda] = temp6; + C[23 + j*lda] = temp7; + C[24 + j*lda] = temp8; + C[25 + j*lda] = temp9; + C[26 + j*lda] = temp10; + C[27 + j*lda] = temp11; + C[28 + j*lda] = temp12; + C[29 + j*lda] = temp13; + C[30 + j*lda] = temp14; + C[31 + j*lda] = temp15; + } + } +} diff --git a/mt/al_matmul/al_matmul.c b/mt/al_matmul/al_matmul.c deleted file mode 100644 index 13bc501..0000000 --- a/mt/al_matmul/al_matmul.c +++ /dev/null @@ -1,273 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k, x; - data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; - - //complete Q1 - if(coreid == 0) { - for(j = 0; j < 32; j++) { - temp0 = C[j*lda]; - temp1 = C[1 + j*lda]; - temp2 = C[2 + j*lda]; - temp3 = C[3 + j*lda]; - temp4 = C[4 + j*lda]; - temp5 = C[5 + j*lda]; - temp6 = C[6 + j*lda]; - temp7 = C[7 + j*lda]; - temp8 = C[8 + j*lda]; - temp9 = C[9 + j*lda]; - temp10 = C[10 + j*lda]; - temp11 = C[11 + j*lda]; - temp12 = C[12 + j*lda]; - temp13 = C[13 + j*lda]; - temp14 = C[14 + j*lda]; - temp15 = C[15 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[k*lda]; - temp1 += A[j*lda + k] * B[1+k*lda]; - temp2 += A[j*lda + k] * B[2+k*lda]; - temp3 += A[j*lda + k] * B[3+k*lda]; - temp4 += A[j*lda + k] * B[4+k*lda]; - temp5 += A[j*lda + k] * B[5+k*lda]; - temp6 += A[j*lda + k] * B[6+k*lda]; - temp7 += A[j*lda + k] * B[7+k*lda]; - temp8 += A[j*lda + k] * B[8+k*lda]; - temp9 += A[j*lda + k] * B[9+k*lda]; - temp10 += A[j*lda + k] * B[10+k*lda]; - temp11 += A[j*lda + k] * B[11+k*lda]; - temp12 += A[j*lda + k] * B[12+k*lda]; - temp13 += A[j*lda + k] * B[13+k*lda]; - temp14 += A[j*lda + k] * B[14+k*lda]; - temp15 += A[j*lda + k] * B[15+k*lda]; - } - C[j*lda] = temp0; - C[1 + j*lda] = temp1; - C[2 + j*lda] = temp2; - C[3 + j*lda] = temp3; - C[4 + j*lda] = temp4; - C[5 + j*lda] = temp5; - C[6 + j*lda] = temp6; - C[7 + j*lda] = temp7; - C[8 + j*lda] = temp8; - C[9 + j*lda] = temp9; - C[10 + j*lda] = temp10; - C[11 + j*lda] = temp11; - C[12 + j*lda] = temp12; - C[13 + j*lda] = temp13; - C[14 + j*lda] = temp14; - C[15 + j*lda] = temp15; - } - } - - else { - for(j = 0; j < 32; j++) { - temp0 = C[16 + j*lda]; - temp1 = C[17 + j*lda]; - temp2 = C[18 + j*lda]; - temp3 = C[19 + j*lda]; - temp4 = C[20 + j*lda]; - temp5 = C[21 + j*lda]; - temp6 = C[22 + j*lda]; - temp7 = C[23 + j*lda]; - temp8 = C[24 + j*lda]; - temp9 = C[25 + j*lda]; - temp10 = C[26 + j*lda]; - temp11 = C[27 + j*lda]; - temp12 = C[28 + j*lda]; - temp13 = C[29 + j*lda]; - temp14 = C[30 + j*lda]; - temp15 = C[31 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[16 + k*lda]; - temp1 += A[j*lda + k] * B[17 + k*lda]; - temp2 += A[j*lda + k] * B[18 + k*lda]; - temp3 += A[j*lda + k] * B[19 + k*lda]; - temp4 += A[j*lda + k] * B[20 + k*lda]; - temp5 += A[j*lda + k] * B[21 + k*lda]; - temp6 += A[j*lda + k] * B[22 + k*lda]; - temp7 += A[j*lda + k] * B[23 + k*lda]; - temp8 += A[j*lda + k] * B[24 + k*lda]; - temp9 += A[j*lda + k] * B[25 + k*lda]; - temp10 += A[j*lda + k] * B[26 + k*lda]; - temp11 += A[j*lda + k] * B[27 + k*lda]; - temp12 += A[j*lda + k] * B[28 + k*lda]; - temp13 += A[j*lda + k] * B[29 + k*lda]; - temp14 += A[j*lda + k] * B[30 + k*lda]; - temp15 += A[j*lda + k] * B[31 + k*lda]; - } - C[16 + j*lda] = temp0; - C[17 + j*lda] = temp1; - C[18 + j*lda] = temp2; - C[19 + j*lda] = temp3; - C[20 + j*lda] = temp4; - C[21 + j*lda] = temp5; - C[22 + j*lda] = temp6; - C[23 + j*lda] = temp7; - C[24 + j*lda] = temp8; - C[25 + j*lda] = temp9; - C[26 + j*lda] = temp10; - C[27 + j*lda] = temp11; - C[28 + j*lda] = temp12; - C[29 + j*lda] = temp13; - C[30 + j*lda] = temp14; - C[31 + j*lda] = temp15; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/al_matmul/dataset.h b/mt/al_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/al_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/al_matmul/matmul_gendata.pl b/mt/al_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/al_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/al_matmul/matmul_mi.c b/mt/al_matmul/matmul_mi.c deleted file mode 100644 index c8016db..0000000 --- a/mt/al_matmul/matmul_mi.c +++ /dev/null @@ -1,327 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - // feel free to make a separate function for MI and MSI versions. - int i, j, k, x; - data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; - - - if(coreid == 0) { - for(j = 0; j < 32; j++) { - temp0 = C[j*lda]; - temp1 = C[1 + j*lda]; - temp2 = C[2 + j*lda]; - temp3 = C[3 + j*lda]; - temp4 = C[4 + j*lda]; - temp5 = C[5 + j*lda]; - temp6 = C[6 + j*lda]; - temp7 = C[7 + j*lda]; - temp8 = C[8 + j*lda]; - temp9 = C[9 + j*lda]; - temp10 = C[10 + j*lda]; - temp11 = C[11 + j*lda]; - temp12 = C[12 + j*lda]; - temp13 = C[13 + j*lda]; - temp14 = C[14 + j*lda]; - temp15 = C[15 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[k*lda]; - temp1 += A[j*lda + k] * B[1 + k*lda]; - temp2 += A[j*lda + k] * B[2 + k*lda]; - temp3 += A[j*lda + k] * B[3 + k*lda]; - temp4 += A[j*lda + k] * B[4 + k*lda]; - temp5 += A[j*lda + k] * B[5 + k*lda]; - temp6 += A[j*lda + k] * B[6 + k*lda]; - temp7 += A[j*lda + k] * B[7 + k*lda]; - temp8 += A[j*lda + k] * B[8 + k*lda]; - temp9 += A[j*lda + k] * B[9 + k*lda]; - temp10 += A[j*lda + k] * B[10 + k*lda]; - temp11 += A[j*lda + k] * B[11 + k*lda]; - temp12 += A[j*lda + k] * B[12 + k*lda]; - temp13 += A[j*lda + k] * B[13 + k*lda]; - temp14 += A[j*lda + k] * B[14 + k*lda]; - temp15 += A[j*lda + k] * B[15 + k*lda]; - } - C[j*lda] = temp0; - C[1 + j*lda] = temp1; - C[2 + j*lda] = temp2; - C[3 + j*lda] = temp3; - C[4 + j*lda] = temp4; - C[5 + j*lda] = temp5; - C[6 + j*lda] = temp6; - C[7 + j*lda] = temp7; - C[8 + j*lda] = temp8; - C[9 + j*lda] = temp9; - C[10 + j*lda] = temp10; - C[11 + j*lda] = temp11; - C[12 + j*lda] = temp12; - C[13 + j*lda] = temp13; - C[14 + j*lda] = temp14; - C[15 + j*lda] = temp15; - } - } - - else { - for(j = 16; j < 32; j++) { - temp0 = C[16 + j*lda]; - temp1 = C[17 + j*lda]; - temp2 = C[18 + j*lda]; - temp3 = C[19 + j*lda]; - temp4 = C[20 + j*lda]; - temp5 = C[21 + j*lda]; - temp6 = C[22 + j*lda]; - temp7 = C[23 + j*lda]; - temp8 = C[24 + j*lda]; - temp9 = C[25 + j*lda]; - temp10 = C[26 + j*lda]; - temp11 = C[27 + j*lda]; - temp12 = C[28 + j*lda]; - temp13 = C[29 + j*lda]; - temp14 = C[30 + j*lda]; - temp15 = C[31 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[16 + k*lda]; - temp1 += A[j*lda + k] * B[17 + k*lda]; - temp2 += A[j*lda + k] * B[18 + k*lda]; - temp3 += A[j*lda + k] * B[19 + k*lda]; - temp4 += A[j*lda + k] * B[20 + k*lda]; - temp5 += A[j*lda + k] * B[21 + k*lda]; - temp6 += A[j*lda + k] * B[22 + k*lda]; - temp7 += A[j*lda + k] * B[23 + k*lda]; - temp8 += A[j*lda + k] * B[24 + k*lda]; - temp9 += A[j*lda + k] * B[25 + k*lda]; - temp10 += A[j*lda + k] * B[26 + k*lda]; - temp11 += A[j*lda + k] * B[27 + k*lda]; - temp12 += A[j*lda + k] * B[28 + k*lda]; - temp13 += A[j*lda + k] * B[29 + k*lda]; - temp14 += A[j*lda + k] * B[30 + k*lda]; - temp15 += A[j*lda + k] * B[31 + k*lda]; - } - C[16 + j*lda] = temp0; - C[17 + j*lda] = temp1; - C[18 + j*lda] = temp2; - C[19 + j*lda] = temp3; - C[20 + j*lda] = temp4; - C[21 + j*lda] = temp5; - C[22 + j*lda] = temp6; - C[23 + j*lda] = temp7; - C[24 + j*lda] = temp8; - C[25 + j*lda] = temp9; - C[26 + j*lda] = temp10; - C[27 + j*lda] = temp11; - C[28 + j*lda] = temp12; - C[29 + j*lda] = temp13; - C[30 + j*lda] = temp14; - C[31 + j*lda] = temp15; - } - for(j = 0; j <16; j++) { - temp0 = C[16 + j*lda]; - temp1 = C[17 + j*lda]; - temp2 = C[18 + j*lda]; - temp3 = C[19 + j*lda]; - temp4 = C[20 + j*lda]; - temp5 = C[21 + j*lda]; - temp6 = C[22 + j*lda]; - temp7 = C[23 + j*lda]; - temp8 = C[24 + j*lda]; - temp9 = C[25 + j*lda]; - temp10 = C[26 + j*lda]; - temp11 = C[27 + j*lda]; - temp12 = C[28 + j*lda]; - temp13 = C[29 + j*lda]; - temp14 = C[30 + j*lda]; - temp15 = C[31 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[16 + k*lda]; - temp1 += A[j*lda + k] * B[17 + k*lda]; - temp2 += A[j*lda + k] * B[18 + k*lda]; - temp3 += A[j*lda + k] * B[19 + k*lda]; - temp4 += A[j*lda + k] * B[20 + k*lda]; - temp5 += A[j*lda + k] * B[21 + k*lda]; - temp6 += A[j*lda + k] * B[22 + k*lda]; - temp7 += A[j*lda + k] * B[23 + k*lda]; - temp8 += A[j*lda + k] * B[24 + k*lda]; - temp9 += A[j*lda + k] * B[25 + k*lda]; - temp10 += A[j*lda + k] * B[26 + k*lda]; - temp11 += A[j*lda + k] * B[27 + k*lda]; - temp12 += A[j*lda + k] * B[28 + k*lda]; - temp13 += A[j*lda + k] * B[29 + k*lda]; - temp14 += A[j*lda + k] * B[30 + k*lda]; - temp15 += A[j*lda + k] * B[31 + k*lda]; - } - C[16 + j*lda] = temp0; - C[17 + j*lda] = temp1; - C[18 + j*lda] = temp2; - C[19 + j*lda] = temp3; - C[20 + j*lda] = temp4; - C[21 + j*lda] = temp5; - C[22 + j*lda] = temp6; - C[23 + j*lda] = temp7; - C[24 + j*lda] = temp8; - C[25 + j*lda] = temp9; - C[26 + j*lda] = temp10; - C[27 + j*lda] = temp11; - C[28 + j*lda] = temp12; - C[29 + j*lda] = temp13; - C[30 + j*lda] = temp14; - C[31 + j*lda] = temp15; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/al_vvadd/al_vvadd.c b/mt/al_vvadd/al_vvadd.c deleted file mode 100755 index fd89916..0000000 --- a/mt/al_vvadd/al_vvadd.c +++ /dev/null @@ -1,173 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t start, end, i; - start = (coreid == 0) ? 0 : n/2; - end = (coreid == 0) ? n/2 : n; - - for (i = start; i < end; i++) - { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/al_vvadd/dataset.h b/mt/al_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/al_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/al_vvadd/vvadd_gendata.pl b/mt/al_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/al_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/am_matmul.c b/mt/am_matmul.c new file mode 100755 index 0000000..a5622fe --- /dev/null +++ b/mt/am_matmul.c @@ -0,0 +1,64 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + size_t i, j, k, l; + int row,row2, column, column2, column3, column4, column5, column6, column7, column8; + size_t max_dim = 32*32; + data_t element, element2, element3, element4, element5, element6, element7, element8; + data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ + for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ + row=l*32; + row2=(l+1)*32; + for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - size_t max_dim = 32*32; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ - row=l*32; - row2=(l+1)*32; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i; - size_t j; - size_t k; - size_t max_dim = 32*32; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores)/2; i+=8){ - data_t element=A[i]; - data_t element2 = A[i+1]; - data_t element3 = A[i+2]; - data_t element4 = A[i+3]; - data_t element5 = A[i+4]; - data_t element6 = A[i+5]; - data_t element7 = A[i+6]; - data_t element8 = A[i+7]; - data_t elementA2 = A[i+32*8]; - data_t elementA21 = A[i+32*8+1]; - data_t elementA22 = A[i+32*8+2]; - data_t elementA23 = A[i+32*8+3]; - data_t elementA24 = A[i+32*8+4]; - data_t elementA25 = A[i+32*8+5]; - data_t elementA26 = A[i+32*8+6]; - data_t elementA27 = A[i+32*8+7]; - int row= (int)(i/32)*32; - int row2 = row+8*32; - int column = i%32*32; - int column2 = (i+1)%32*32; - int column3 = (i+2)%32*32; - int column4 = (i+3)%32*32; - int column5 = (i+4)%32*32; - int column6 = (i+5)%32*32; - int column7 = (i+6)%32*32; - int column8 = (i+7)%32*32; - - for (j=0; j<32; j++){ - temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j]; - - temp_mat2[j]+=elementA2*B[column+j]+elementA21*B[column2+j]+elementA22*B[column3+j]+elementA23*B[column4+j]+elementA24*B[column5+j]+elementA25*B[column6+j]+elementA26*B[column7+j]+elementA27*B[column8+j]; - } - if (i%32==24){ - for(k=0; k<32; k++){ - C[row+k]=temp_mat[k]; - C[row2+k]=temp_mat2[k]; - temp_mat[k]=0; - temp_mat2[k]=0; - - } - } - } - - - - - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/am_matmul/matmul4.c b/mt/am_matmul/matmul4.c deleted file mode 100755 index 7953d59..0000000 --- a/mt/am_matmul/matmul4.c +++ /dev/null @@ -1,282 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - /*size_t i; - size_t j; - size_t k; - size_t max_dim = 32*32; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - data_t element=A[i]; - data_t element2 = A[i+1]; - data_t element3 = A[i+2]; - data_t element4 = A[i+3]; - data_t element5 = A[i+4]; - data_t element6 = A[i+5]; - data_t element7 = A[i+6]; - data_t element8 = A[i+7]; - int row= (int)(i/32)*32; - int column = i%32*32; - int column2 = (i+1)%32*32; - int column3 = (i+2)%32*32; - int column4 = (i+3)%32*32; - int column5 = (i+4)%32*32; - int column6 = (i+5)%32*32; - int column7 = (i+6)%32*32; - int column8 = (i+7)%32*32; - - for (j=0; j<32; j++){ - temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j]; - } - if (i%32==24){ - for(k=0; k<32; k++){ - C[row+k]=temp_mat[k]; - temp_mat[k]=0; - } - } - }*/ - int i,j,k,l; - //data_t element11, element12, element13, element14, element21, element22, element23, element24; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - //int column11, column12, column13, column14, column21, column22, column23, column24; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i<32; i++){ - if (i==15){ - for (j=0; j<32; j+=4){ - row=15*32; - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0;k<32; k++){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - } - if (j==28){ - for (l=0; l<32; l++){ - C[row+l]=temp[l]; - temp[l]=0; - } - } - } - } - else{ - row = i*32; - for (j=0; j<16; j+=4){ - element1 = A[i*32+j]; - element2 = A[i*32+j+1]; - element3 = A[i*32+j+2]; - element4 = A[i*32+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k++){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - } - if (j==12){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - temp[l]=0; - } - } - } - } - } - } - else if (coreid==1){ - for (i=0; i<32; i++){ - row = (31-i)*32; - if (row/32 != 15){ - for (j=16; j<32; j+=4){ - element1 = A[(31-i)*32+j]; - element2 = A[(31-i)*32+j+1]; - element3 = A[(31-i)*32+j+2]; - element4 = A[(31-i)*32+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k++){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - } - if (j==28){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - temp[l]=0; - } - } - } - } - } - } - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/am_matmul/matmul_gendata.pl b/mt/am_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/am_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/am_matmul/matmul_mi.c b/mt/am_matmul/matmul_mi.c deleted file mode 100755 index 0a93ce2..0000000 --- a/mt/am_matmul/matmul_mi.c +++ /dev/null @@ -1,249 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i,j,k,l; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - /*size_t i, j, k; - int row, column, column2, column3, column4, column5, column6, column7, column8; - size_t max_dim = 32*32; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - element=A[i]; - element2 = A[i+1]; - element3 = A[i+2]; - element4 = A[i+3]; - element5 = A[i+4]; - element6 = A[i+5]; - element7 = A[i+6]; - element8 = A[i+7]; - row= (int)(i/32)*32; - column = i%32*32; - column2 = (i+1)%32*32; - column3 = (i+2)%32*32; - column4 = (i+3)%32*32; - column5 = (i+4)%32*32; - column6 = (i+5)%32*32; - column7 = (i+6)%32*32; - column8 = (i+7)%32*32; - - for (j=0; j<32; j+=8){ - temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j]; - temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1]+element5*B[column5+j+1]+element6*B[column6+j+1]+element7*B[column7+j+1]+element8*B[column8+j+1]; - temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2]+element5*B[column5+j+2]+element6*B[column6+j+2]+element7*B[column7+j+2]+element8*B[column8+j+2]; - temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3]+element5*B[column5+j+3]+element6*B[column6+j+3]+element7*B[column7+j+3]+element8*B[column8+j+3]; - temp_mat[j+4]+=element*B[column+j+4]+element2*B[column2+j+4]+element3*B[column3+j+4]+element4*B[column4+j+4]+element5*B[column5+j+4]+element6*B[column6+j+4]+element7*B[column7+j+4]+element8*B[column8+j+4]; - temp_mat[j+5]+=element*B[column+j+5]+element2*B[column2+j+5]+element3*B[column3+j+5]+element4*B[column4+j+5]+element5*B[column5+j+5]+element6*B[column6+j+5]+element7*B[column7+j+5]+element8*B[column8+j+5]; - temp_mat[j+6]+=element*B[column+j+6]+element2*B[column2+j+6]+element3*B[column3+j+6]+element4*B[column4+j+6]+element5*B[column5+j+6]+element6*B[column6+j+6]+element7*B[column7+j+6]+element8*B[column8+j+6]; - temp_mat[j+7]+=element*B[column+j+7]+element2*B[column2+j+7]+element3*B[column3+j+7]+element4*B[column4+j+7]+element5*B[column5+j+7]+element6*B[column6+j+7]+element7*B[column7+j+7]+element8*B[column8+j+7]; - } - if (i%32==24){ - for(k=0; k<32; k++){ - C[row+k]=temp_mat[k]; - temp_mat[k]=0; - } - } - }*/ - int i,j,k,l; - data_t element1, element2, element3, element4, element5, element6, element7, element8; - int row, row2; - int column1, column2, column3, column4, column5, column6, column7, column8; - data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - if (coreid == 0){ - for (i=0; i<32; i+=2){ - row = i*32; - row2 = (i+1)*32; - for (j=0; j<16; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==12){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - else if (coreid==1){ - for (i=0; i<32; i+=2){ - row = (31-i)*32; - row2 = (31-i-1)*32; - for (j=16; j<32; j+=4){ - element1 = A[row+j]; - element2 = A[row+j+1]; - element3 = A[row+j+2]; - element4 = A[row+j+3]; - element5 = A[row2+j]; - element6 = A[row2+j+1]; - element7 = A[row2+j+2]; - element8 = A[row2+j+3]; - column1 = j*32; - column2 = (j+1)*32; - column3 = (j+2)*32; - column4 = (j+3)*32; - for (k=0; k<32; k+=4){ - temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; - temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; - temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; - temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; - temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; - temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; - temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; - temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; - } - if (j==28){ - for (l=0; l<32; l++){ - C[row+l]+=temp[l]; - C[row2+l]+=temp2[l]; - temp[l]=0; - temp2[l]=0; - } - } - } - } - } - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(); - - - // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - -#ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); - - exit(0); -} - diff --git a/mt/am_matmul/matmul_msi.c b/mt/am_matmul/matmul_msi.c deleted file mode 100755 index b4e5ad8..0000000 --- a/mt/am_matmul/matmul_msi.c +++ /dev/null @@ -1,216 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - size_t max_dim = 32*32; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ - row=l*32; - row2=(l+1)*32; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t i, j, k, l; - int row,row2, column, column2, column3, column4, column5, column6, column7, column8; - size_t max_dim = 32*32; - data_t element, element2, element3, element4, element5, element6, element7, element8; - data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){ - for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){ - row=l*32; - row2=(l+1)*32; - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - for (i= coreid*n/ncores; i<(n/ncores+coreid*n/ncores); i++){ - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/am_vvadd/dataset.h b/mt/am_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/am_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/am_vvadd/vvadd_gendata.pl b/mt/am_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/am_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/an_matmul.c b/mt/an_matmul.c new file mode 100755 index 0000000..eb76ffa --- /dev/null +++ b/mt/an_matmul.c @@ -0,0 +1,40 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + int i, j, k, limit, end, kblock, iblock, r, jblock; + int tempA1; + int tempB1; + + limit = lda / ncores; + j = (coreid)*limit; + end = (coreid+1)*limit; + + kblock = 1; + iblock = 1; + jblock = 1; + for (; j < end; j+= jblock) + for ( k = 0; k < lda; k = k + kblock ) + { + r = j*lda + k; + tempA1 = A[r]; + + for ( i = 0; i < lda; i = i + iblock ) { + tempB1 = k*lda + i; + + C[i + j*lda] += tempA1*B[tempB1]; + + } + barrier(ncores); + } + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + +} diff --git a/mt/an_matmul/an_matmul.c b/mt/an_matmul/an_matmul.c deleted file mode 100755 index 2150277..0000000 --- a/mt/an_matmul/an_matmul.c +++ /dev/null @@ -1,196 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( j = 0; j < lda; j++ ) - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - int i, j, k, limit, end, kblock, iblock, r, jblock; - int tempA1; - int tempB1; - - limit = lda / 2; - if (coreid == 0){ - j = 0; - end = limit; - } else { - j = limit; - end = lda; - } - - kblock = 1; - iblock = 1; - jblock = 1; - for (; j < end; j+= jblock) - for ( k = 0; k < lda; k = k + kblock ) - { - r = j*lda + k; - tempA1 = A[r]; - - for ( i = 0; i < lda; i = i + iblock ) { - tempB1 = k*lda + i; - - C[i + j*lda] += tempA1*B[tempB1]; - - } - } - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/an_matmul/dataset.h b/mt/an_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/an_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/an_matmul/matmul_gendata.pl b/mt/an_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/an_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/an_matmul/matmul_mi.c b/mt/an_matmul/matmul_mi.c deleted file mode 100644 index 2150277..0000000 --- a/mt/an_matmul/matmul_mi.c +++ /dev/null @@ -1,196 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( j = 0; j < lda; j++ ) - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - int i, j, k, limit, end, kblock, iblock, r, jblock; - int tempA1; - int tempB1; - - limit = lda / 2; - if (coreid == 0){ - j = 0; - end = limit; - } else { - j = limit; - end = lda; - } - - kblock = 1; - iblock = 1; - jblock = 1; - for (; j < end; j+= jblock) - for ( k = 0; k < lda; k = k + kblock ) - { - r = j*lda + k; - tempA1 = A[r]; - - for ( i = 0; i < lda; i = i + iblock ) { - tempB1 = k*lda + i; - - C[i + j*lda] += tempA1*B[tempB1]; - - } - } - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/an_vvadd/an_vvadd.c b/mt/an_vvadd/an_vvadd.c deleted file mode 100755 index d5868e4..0000000 --- a/mt/an_vvadd/an_vvadd.c +++ /dev/null @@ -1,165 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/an_vvadd/dataset.h b/mt/an_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/an_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/an_vvadd/vvadd_gendata.pl b/mt/an_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/an_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ap_matmul.c b/mt/ap_matmul.c new file mode 100755 index 0000000..ef376cb --- /dev/null +++ b/mt/ap_matmul.c @@ -0,0 +1,29 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int i, j, k, ii, jj, kk; + int block = lda / ncores; + int leftover = lda % ncores; + int start = block * coreid; + + + + for ( j = start; j < (start+block); j++ ) + for ( k = 0; k < lda; k++ ) + { + for ( i = 0; i < lda; i++ ) + { + C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; + } + } +} diff --git a/mt/ap_matmul/ap_matmul.c b/mt/ap_matmul/ap_matmul.c deleted file mode 100755 index 1b31d86..0000000 --- a/mt/ap_matmul/ap_matmul.c +++ /dev/null @@ -1,238 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: ME STEPHANIE TUNG -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - int block = lda / ncores; - int leftover = lda % ncores; - int start = block * coreid; - - - - for ( j = start; j < (start+block); j++ ) - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -/* - - - for ( j = coreid; j < lda; j += ncores ) - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -/* - if (coreid > 0) { - return; - } - - for ( j = (lda - leftover); j < lda; j++ ) - for ( i = 0; i < lda; i++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - - - -if (coreid > 0) { - return; -} - - - -for (jj = start; jj < start+block; jj += 4) { - for (kk = 0; kk < lda; kk += 4) { - for (ii = 0; ii < lda; ii += 4) { - for (i = ii; i < ii+4; i += 4) { - //float * p = B + i; - for (j = jj; j < jj+4; j++) { - for (k = kk; k < kk+4; k++) { - - float a = A[k + j*lda]; - - C[i + j*lda] += a * B[k*lda + i]; - C[i + j*lda + 1] += a * B[k*lda + i + 1]; - C[i + j*lda + 2] += a * B[k*lda + i + 2]; - C[i + j*lda + 3] += a * B[k*lda + i + 3]; - } - } - } - } - } -} - -*/ - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ap_matmul/dataset.h b/mt/ap_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ap_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ap_matmul/matmul_gendata.pl b/mt/ap_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ap_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ap_matmul/matmul_mi.c b/mt/ap_matmul/matmul_mi.c deleted file mode 100755 index 1b31d86..0000000 --- a/mt/ap_matmul/matmul_mi.c +++ /dev/null @@ -1,238 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: ME STEPHANIE TUNG -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - int block = lda / ncores; - int leftover = lda % ncores; - int start = block * coreid; - - - - for ( j = start; j < (start+block); j++ ) - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -/* - - - for ( j = coreid; j < lda; j += ncores ) - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -/* - if (coreid > 0) { - return; - } - - for ( j = (lda - leftover); j < lda; j++ ) - for ( i = 0; i < lda; i++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - - - -if (coreid > 0) { - return; -} - - - -for (jj = start; jj < start+block; jj += 4) { - for (kk = 0; kk < lda; kk += 4) { - for (ii = 0; ii < lda; ii += 4) { - for (i = ii; i < ii+4; i += 4) { - //float * p = B + i; - for (j = jj; j < jj+4; j++) { - for (k = kk; k < kk+4; k++) { - - float a = A[k + j*lda]; - - C[i + j*lda] += a * B[k*lda + i]; - C[i + j*lda + 1] += a * B[k*lda + i + 1]; - C[i + j*lda + 2] += a * B[k*lda + i + 2]; - C[i + j*lda + 3] += a * B[k*lda + i + 3]; - } - } - } - } - } -} - -*/ - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ap_vvadd/.vvadd.c.swp b/mt/ap_vvadd/.vvadd.c.swp deleted file mode 100644 index f7e8ee9..0000000 Binary files a/mt/ap_vvadd/.vvadd.c.swp and /dev/null differ diff --git a/mt/ap_vvadd/ap_vvadd.c b/mt/ap_vvadd/ap_vvadd.c deleted file mode 100755 index aa3b3ad..0000000 --- a/mt/ap_vvadd/ap_vvadd.c +++ /dev/null @@ -1,182 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i, j; - - size_t blocksize = n / ncores; - size_t start = coreid * blocksize; - size_t leftover = n % ncores; - -// int i, j; - - for (i = start; i < (start + blocksize); i++) { - x[i] = x[i] + y[i]; - } - - for (j = (n - leftover) + coreid; j < n; j += ncores) { - x[j] = x[j] + y[j]; - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ap_vvadd/dataset.h b/mt/ap_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ap_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ap_vvadd/vvadd_gendata.pl b/mt/ap_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ap_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/aq_matmul.c b/mt/aq_matmul.c new file mode 100644 index 0000000..f2fb3e0 --- /dev/null +++ b/mt/aq_matmul.c @@ -0,0 +1,30 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + for (int i = coreid; i < lda; i+=ncores*2) + { + for (int j = 0; j < lda; j++) + { + for (int k = 0; k < lda; k++) + { + int A12 = A[j*lda + k]; + int B1 = B[k*lda + i]; + int B2 = B[k*lda + i + ncores]; + C[i+j*lda] += A12 * B1; + C[i+ncores+j*lda] += A12 * B2; + //C[i+j*lda] += A[j*lda +k] * B[k*lda +i]; + } + } + } +} diff --git a/mt/aq_matmul/aq_matmul.c b/mt/aq_matmul/aq_matmul.c deleted file mode 100644 index a203766..0000000 --- a/mt/aq_matmul/aq_matmul.c +++ /dev/null @@ -1,183 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - for (int i = coreid; i < lda; i+=ncores*2) - { - for (int j = 0; j < lda; j++) - { - for (int k = 0; k < lda; k++) - { - int A12 = A[j*lda + k]; - int B1 = B[k*lda + i]; - int B2 = B[k*lda + i + ncores]; - C[i+j*lda] += A12 * B1; - C[i+ncores+j*lda] += A12 * B2; - //C[i+j*lda] += A[j*lda +k] * B[k*lda +i]; - } - } - } -} - - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/aq_matmul/dataset.h b/mt/aq_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/aq_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/aq_matmul/matmul_gendata.pl b/mt/aq_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/aq_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/aq_matmul/matmul_mi.c b/mt/aq_matmul/matmul_mi.c deleted file mode 100755 index 932e6bc..0000000 --- a/mt/aq_matmul/matmul_mi.c +++ /dev/null @@ -1,183 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - for (int i = coreid; i < lda; i+=ncores) - { - for (int j = 0; j < lda; j++) - { - for (int k = 0; k < lda; k++) - { - //int A12 = A[j*lda + k]; - //int B1 = B[k*lda + i]; - //int B2 = B[k*lda + i + ncores]; - //C[i+j*lda] += A12 * B1; - //C[i+ncores+j*lda] += A12 * B2; - C[i+j*lda] += A[j*lda +k] * B[k*lda +i]; - } - } - } -} - - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/aq_vvadd/aq_vvadd.c b/mt/aq_vvadd/aq_vvadd.c deleted file mode 100755 index 375569b..0000000 --- a/mt/aq_vvadd/aq_vvadd.c +++ /dev/null @@ -1,191 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - - for (i = coreid; i < n; i+=ncores*2) - { - //int x1 = x[i]; - //int x2 = x[i+ncores]; - //int x3 = x[i+ncores*2]; - //int x4 = x[i+ncores*4]; - //int y1 = y[i]; - //int y2 = y[i+ncores]; - //int y3 = y[i+ncores*2]; - //int y4 = y[i+ncores*4]; - int x1 = x[i]; - int x2 = x[i+ncores]; - int y1 = y[i]; - int y2 = y[i+ncores]; - x[i] = x1 + y1; - x[i+ncores] = x2 + y2; - //x[i+ncores*2] = x[i+ncores*2] + y[i+ncores*2]; - // x[i+ncores*4] = x[i+ncores*4] + y[i+ncores*4]; - //x[i] = x1 + y1; - //x[i+ncores] = x2 + y2; - //x[i+ncores*2] = x3 + y3; - //x[i+ncores*4] = x4 + y4; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/aq_vvadd/dataset.h b/mt/aq_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/aq_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/aq_vvadd/vvadd_gendata.pl b/mt/aq_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/aq_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ar_matmul.c b/mt/ar_matmul.c new file mode 100755 index 0000000..2299906 --- /dev/null +++ b/mt/ar_matmul.c @@ -0,0 +1,41 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + int i, j, k, B_t[32*32], x, y; + int ALoc, BLoc, CLoc; +// int ii = 0, done = 0; + //for(x = coreid*(lda/ncores); x < (coreid+1)*(lda/ncores) && x < lda; x++) { + for (x = 0; x < lda; x++) { + for(y = 0; y < lda; y++) { + B_t[y*lda + x] = B[x*lda + y]; + } + } + // for ( ii = lda/4 ; ii < lda ; ii += lda/4) + //{ +// for ( i = coreid*(ii/ncores); i < (coreid+1)*(ii/ncores) && i < ii; i++ ) + for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores) && i < lda; i++ ) + { + ALoc = i*lda; + for ( j = 0; j < lda; j++ ) + { + BLoc = j*lda; + CLoc = i*lda + j; + for ( k = 0; k < lda; k++ ) + { + C[CLoc] += A[ALoc + k] * B_t[BLoc + k]; + } + } + } + //} +} diff --git a/mt/ar_matmul/ar_matmul.c b/mt/ar_matmul/ar_matmul.c deleted file mode 100755 index 9286adc..0000000 --- a/mt/ar_matmul/ar_matmul.c +++ /dev/null @@ -1,193 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k, B_t[32*32], x, y; - int ALoc, BLoc, CLoc; -// int ii = 0, done = 0; - //for(x = coreid*(lda/ncores); x < (coreid+1)*(lda/ncores) && x < lda; x++) { - for (x = 0; x < lda; x++) { - for(y = 0; y < lda; y++) { - B_t[y*lda + x] = B[x*lda + y]; - } - } - // for ( ii = lda/4 ; ii < lda ; ii += lda/4) - //{ -// for ( i = coreid*(ii/ncores); i < (coreid+1)*(ii/ncores) && i < ii; i++ ) - for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores) && i < lda; i++ ) - { - ALoc = i*lda; - for ( j = 0; j < lda; j++ ) - { - BLoc = j*lda; - CLoc = i*lda + j; - for ( k = 0; k < lda; k++ ) - { - C[CLoc] += A[ALoc + k] * B_t[BLoc + k]; - } - } - } - //} -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ar_matmul/dataset.h b/mt/ar_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ar_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ar_matmul/matmul_gendata.pl b/mt/ar_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ar_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ar_vvadd/ar_vvadd.c b/mt/ar_vvadd/ar_vvadd.c deleted file mode 100755 index 18ad033..0000000 --- a/mt/ar_vvadd/ar_vvadd.c +++ /dev/null @@ -1,170 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - for(int i = coreid*(n/ncores); i < (coreid+1)*(n/ncores) && i < n; i++) - { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ar_vvadd/dataset.h b/mt/ar_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ar_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ar_vvadd/vvadd_gendata.pl b/mt/ar_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ar_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/as_matmul/as_matmul.c b/mt/as_matmul/as_matmul.c deleted file mode 100755 index 15855e0..0000000 --- a/mt/as_matmul/as_matmul.c +++ /dev/null @@ -1,281 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k, n, m; - - - //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 957424 cycles, 29.2 cycles/iter, 3.6 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(nc): 340408 cycles, 10.3 cycles/iter, 1.8 CPI - - for (n = 0; n < lda; n += 1) { - for (m = 0; m < lda; m += 1) { - bTranspose[lda*m + n] = B[lda*n + m]; - bTranspose[lda*n + m] = B[lda*m + n]; - } - } - barrier(ncores); - - for ( j = coreid; j < lda; j += 2*ncores ) { - for ( i = 0; i < lda; i += 1 ){ - c1 = 0; //global vars c1, c2 - c2 = 0; - for ( k = 0; k < lda; k += 1 ) { - c1 += A[j * lda + k] * bTranspose[i*lda + k]; - c2 += A[(j+2) * lda + k] * bTranspose[i*lda + k]; - - //barrier(nc); - } - - C[i + j * lda] = c1; - C[i + (j+2) * lda] = c2; - barrier(ncores); - } - //barrier(nc); - } - - - - - //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 983609 cycles, 30.0 cycles/iter, 3.7 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(nc): 389942 cycles, 11.9 cycles/iter, 2.5 CPI - - /* - for ( j = coreid; j < lda; j += 2*ncores ) { - for ( i = 0; i < lda; i += 1 ){ - c1 = 0; //global vars c1, c2 - c2 = 0; - for ( k = 0; k < lda; k += 1 ) { - c1 += A[j * lda + k] * B[k*lda + i]; - c2 += A[(j+2) * lda + k] * B[k*lda + i]; - - //barrier(nc); - } - - C[i + j * lda] = c1; - C[i + (j+2) * lda] = c2; - barrier(nc); - } - //barrier(nc); - } - */ - - // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 973781 cycles, 29.7 cycles/iter, 3.7 CPI - // matmul(32, input1_data, input2_data, results_data); barrier(nc): 461066 cycles, 14.0 cycles/iter, 3.5 CPI - // for ( k = 0; k < lda; k += 1 ) { - // for ( j = coreid; j < lda; j += 2*ncores ) { - // for ( i = 0; i < lda; i += 1 ){ - // C[i + j * lda] += A[j * lda + k] * B[k*lda + i]; - // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i]; - // //barrier(nc); - // } - // barrier(nc); - // } - // //barrier(nc); - // } - - - // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 965136 cycles, 29.4 cycles/iter, 3.7 CPI - // matmul(32, input1_data, input2_data, results_data); barrier(nc): 513779 cycles, 15.6 cycles/iter, 3.2 CPI - - // for ( j = coreid; j < lda; j += 2*ncores ) { - // for ( i = 0; i < lda; i += 1 ){ - // for ( k = 0; k < lda; k += 1 ) { - // C[i + j * lda] += A[j * lda + k] * B[k*lda + i]; - // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i]; - - // //barrier(nc); - // } - // barrier(nc); - // } - // //barrier(nc); - //} - - - // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 937892 cycles, 28.6 cycles/iter, 3.6 CPI - // matmul(32, input1_data, input2_data, results_data); barrier(nc): 576478 cycles, 17.5 cycles/iter, 3.5 CPI - - // for ( i = 0; i < lda; i += 1 ){ - // for ( j = coreid; j < lda; j += 2*ncores ) { - // for ( k = 0; k < lda; k += 1 ) { - // C[i + j * lda] += A[j * lda + k] * B[k*lda + i]; - // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i]; - - // //barrier(nc); - // } - // barrier(nc); - // } - // //barrier(nc); - // } - - //for ( i = coreid; i < lda; i += ncores ){ - // for ( j = coreid; j < lda; j += ncores ) { - // for ( k = coreid; k < lda; k += ncores ) { - // C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - // } - //barrier(nc); - // } - //} -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/as_matmul/dataset.h b/mt/as_matmul/dataset.h deleted file mode 100755 index 75e80d6..0000000 --- a/mt/as_matmul/dataset.h +++ /dev/null @@ -1,180 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static int c1; -static int c2; -//static int n; -//static int m; -static data_t bTranspose[DIM_SIZE*DIM_SIZE]; - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/as_matmul/matmul_gendata.pl b/mt/as_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/as_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/as_matmul/matmul_mi.c b/mt/as_matmul/matmul_mi.c deleted file mode 100644 index 88534b8..0000000 --- a/mt/as_matmul/matmul_mi.c +++ /dev/null @@ -1,189 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k, n, m, c1, c2; - - //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI - - for ( j = coreid; j < lda; j += 2*ncores ) { - for ( i = 0; i < lda; i += 1 ){ - c1 = 0; //global vars c1, c2 - c2 = 0; - for ( k = 0; k < lda; k += 1 ) { - c1 += A[j * lda + k] * B[k*lda + i]; - c2 += A[(j+2) * lda + k] * B[k*lda + i]; - - //barrier(nc); - } - - C[i + j * lda] = c1; - C[i + (j+2) * lda] = c2; - barrier(nc); - } - //barrier(nc); - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} diff --git a/mt/as_vvadd/as_vvadd.c b/mt/as_vvadd/as_vvadd.c deleted file mode 100755 index 3034ae3..0000000 --- a/mt/as_vvadd/as_vvadd.c +++ /dev/null @@ -1,174 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - - for (i = coreid; i < n; i += 2*ncores) { - x[i] = x[i] + y[i]; - x[i+2] = x[i+2] + y[i+2]; - //barrier(nc); - } - barrier(ncores); //adding a barrier so there aren't any OOB errors due to faster threads - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/as_vvadd/dataset.h b/mt/as_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/as_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/as_vvadd/vvadd_gendata.pl b/mt/as_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/as_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/at_matmul.c b/mt/at_matmul.c new file mode 100755 index 0000000..1e4eeb3 --- /dev/null +++ b/mt/at_matmul.c @@ -0,0 +1,164 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + int i, j, k; + + /*547287 + for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ ) + { + for ( j = 0; j < lda; j++ ) + { + int aIndex = j*lda; + int cIndex = i + aIndex; + C[cIndex] += A[aIndex] * B[i]; + C[cIndex] += A[aIndex + 1] * B[1*lda + i]; + C[cIndex] += A[aIndex + 2] * B[2*lda + i]; + C[cIndex] += A[aIndex + 3] * B[3*lda + i]; + C[cIndex] += A[aIndex + 4] * B[4*lda + i]; + C[cIndex] += A[aIndex + 5] * B[5*lda + i]; + C[cIndex] += A[aIndex + 6] * B[6*lda + i]; + C[cIndex] += A[aIndex + 7] * B[7*lda + i]; + C[cIndex] += A[aIndex + 8] * B[8*lda + i]; + C[cIndex] += A[aIndex + 9] * B[9*lda + i]; + C[cIndex] += A[aIndex + 10] * B[10*lda + i]; + C[cIndex] += A[aIndex + 11] * B[11*lda + i]; + C[cIndex] += A[aIndex + 12] * B[12*lda + i]; + C[cIndex] += A[aIndex + 13] * B[13*lda + i]; + C[cIndex] += A[aIndex + 14] * B[14*lda + i]; + C[cIndex] += A[aIndex + 15] * B[15*lda + i]; + C[cIndex] += A[aIndex + 16] * B[16*lda + i]; + C[cIndex] += A[aIndex + 17] * B[17*lda + i]; + C[cIndex] += A[aIndex + 18] * B[18*lda + i]; + C[cIndex] += A[aIndex + 19] * B[19*lda + i]; + C[cIndex] += A[aIndex + 20] * B[20*lda + i]; + C[cIndex] += A[aIndex + 21] * B[21*lda + i]; + C[cIndex] += A[aIndex + 22] * B[22*lda + i]; + C[cIndex] += A[aIndex + 23] * B[23*lda + i]; + C[cIndex] += A[aIndex + 24] * B[24*lda + i]; + C[cIndex] += A[aIndex + 25] * B[25*lda + i]; + C[cIndex] += A[aIndex + 26] * B[26*lda + i]; + C[cIndex] += A[aIndex + 27] * B[27*lda + i]; + C[cIndex] += A[aIndex + 28] * B[28*lda + i]; + C[cIndex] += A[aIndex + 29] * B[29*lda + i]; + C[cIndex] += A[aIndex + 30] * B[30*lda + i]; + C[cIndex] += A[aIndex + 31] * B[31*lda + i]; + } + } + */ + + //492827 + /* for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ ) + { + for ( j = 0; j < lda; j++ ) + { + + int aIndex = j*lda; + int cIndex = i + aIndex; + for ( k = 0; k < lda; k++) + { + C[cIndex] += A[aIndex + k] * B[k*lda + i]; + /* C[cIndex] += A[aIndex + k+1] * B[(k+1)*lda + i]; + C[cIndex] += A[aIndex + k+2] * B[(k+2)*lda + i]; + C[cIndex] += A[aIndex + k+3] * B[(k+3)*lda + i]; + C[cIndex] += A[aIndex + k+4] * B[(k+4)*lda + i]; + C[cIndex] += A[aIndex + k+5] * B[(k+5)*lda + i]; + C[cIndex] += A[aIndex + k+6] * B[(k+6)*lda + i]; + C[cIndex] += A[aIndex + k+7] * B[(k+7)*lda + i]; + C[cIndex] += A[aIndex + k+8] * B[(k+8)*lda + i]; + C[cIndex] += A[aIndex + k+9] * B[(k+9)*lda + i]; + C[cIndex] += A[aIndex + k+10] * B[(k+10)*lda + i]; + C[cIndex] += A[aIndex + k+11] * B[(k+11)*lda + i]; + C[cIndex] += A[aIndex + k+12] * B[(k+12)*lda + i]; + C[cIndex] += A[aIndex + k+13] * B[(k+13)*lda + i]; + C[cIndex] += A[aIndex + k+14] * B[(k+14)*lda + i]; + C[cIndex] += A[aIndex + k+15] * B[(k+15)*lda + i];*/ + /* } + } + }*/ + /* + //326378 + data_t bTrans[1024]; + + for (int counti = 0; counti < 32; counti++) { + for (int countj = 0; countj < 32; countj++) { + *(bTrans + counti + countj*lda) = *(B + countj + counti*lda); + } + } + + + int BLOCKSIZE = 8; + for ( i = 0; i < lda; i+=BLOCKSIZE ) + { + for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) { + int iFlag = iTemp*lda; + for ( j = coreid*lda/ncores; j < (coreid+1)*lda/ncores; j++ ) { + int jFlag = j*lda; + int cLoc = jFlag+iTemp; + for ( k = 0; k < lda; k+=8) { + *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k); + *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1); + *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2); + *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3); + *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4); + *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5); + *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6); + *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7); + } + } + } + }*/ + data_t bTrans[1024]; + + for (int counti = 0; counti < 32; counti++) { + for (int countj = 0; countj < 32; countj++) { + *(bTrans + counti + countj*lda) = *(B + countj + counti*lda); + } + } + + + int BLOCKSIZE = 8; + for ( j = 0; j < lda; j++ ) + { + //for ( int jTemp = j; jTemp < j + BLOCKSIZE; jTemp++ ) { + int jFlag = j*lda; + for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i+=BLOCKSIZE ) { + for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) { + + int iFlag = iTemp*lda; + int cLoc = jFlag+iTemp; + for ( k = 0; k < lda; k+=16) { + *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k); + *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1); + *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2); + *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3); + *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4); + *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5); + *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6); + *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7); + *(C+cLoc) += *(A+jFlag+k+8) * *(bTrans+iFlag+k+8); + *(C+cLoc) += *(A+jFlag+k+9) * *(bTrans+iFlag+k+9); + *(C+cLoc) += *(A+jFlag+k+10) * *(bTrans+iFlag+k+10); + *(C+cLoc) += *(A+jFlag+k+11) * *(bTrans+iFlag+k+11); + *(C+cLoc) += *(A+jFlag+k+12) * *(bTrans+iFlag+k+12); + *(C+cLoc) += *(A+jFlag+k+13) * *(bTrans+iFlag+k+13); + *(C+cLoc) += *(A+jFlag+k+14) * *(bTrans+iFlag+k+14); + *(C+cLoc) += *(A+jFlag+k+15) * *(bTrans+iFlag+k+15); + } + } + } + //} + } + + +} diff --git a/mt/at_matmul/at_matmul.c b/mt/at_matmul/at_matmul.c deleted file mode 100755 index ccda17a..0000000 --- a/mt/at_matmul/at_matmul.c +++ /dev/null @@ -1,317 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ -unsigned long _c = -rdcycle(), _i = -rdinstret(); \ -code; \ -_c += rdcycle(), _i += rdinstret(); \ -if (coreid == 0) \ -printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ -stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ -} while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - { - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - } -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k; - - /*547287 - for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ ) - { - for ( j = 0; j < lda; j++ ) - { - int aIndex = j*lda; - int cIndex = i + aIndex; - C[cIndex] += A[aIndex] * B[i]; - C[cIndex] += A[aIndex + 1] * B[1*lda + i]; - C[cIndex] += A[aIndex + 2] * B[2*lda + i]; - C[cIndex] += A[aIndex + 3] * B[3*lda + i]; - C[cIndex] += A[aIndex + 4] * B[4*lda + i]; - C[cIndex] += A[aIndex + 5] * B[5*lda + i]; - C[cIndex] += A[aIndex + 6] * B[6*lda + i]; - C[cIndex] += A[aIndex + 7] * B[7*lda + i]; - C[cIndex] += A[aIndex + 8] * B[8*lda + i]; - C[cIndex] += A[aIndex + 9] * B[9*lda + i]; - C[cIndex] += A[aIndex + 10] * B[10*lda + i]; - C[cIndex] += A[aIndex + 11] * B[11*lda + i]; - C[cIndex] += A[aIndex + 12] * B[12*lda + i]; - C[cIndex] += A[aIndex + 13] * B[13*lda + i]; - C[cIndex] += A[aIndex + 14] * B[14*lda + i]; - C[cIndex] += A[aIndex + 15] * B[15*lda + i]; - C[cIndex] += A[aIndex + 16] * B[16*lda + i]; - C[cIndex] += A[aIndex + 17] * B[17*lda + i]; - C[cIndex] += A[aIndex + 18] * B[18*lda + i]; - C[cIndex] += A[aIndex + 19] * B[19*lda + i]; - C[cIndex] += A[aIndex + 20] * B[20*lda + i]; - C[cIndex] += A[aIndex + 21] * B[21*lda + i]; - C[cIndex] += A[aIndex + 22] * B[22*lda + i]; - C[cIndex] += A[aIndex + 23] * B[23*lda + i]; - C[cIndex] += A[aIndex + 24] * B[24*lda + i]; - C[cIndex] += A[aIndex + 25] * B[25*lda + i]; - C[cIndex] += A[aIndex + 26] * B[26*lda + i]; - C[cIndex] += A[aIndex + 27] * B[27*lda + i]; - C[cIndex] += A[aIndex + 28] * B[28*lda + i]; - C[cIndex] += A[aIndex + 29] * B[29*lda + i]; - C[cIndex] += A[aIndex + 30] * B[30*lda + i]; - C[cIndex] += A[aIndex + 31] * B[31*lda + i]; - } - } - */ - - //492827 - /* for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ ) - { - for ( j = 0; j < lda; j++ ) - { - - int aIndex = j*lda; - int cIndex = i + aIndex; - for ( k = 0; k < lda; k++) - { - C[cIndex] += A[aIndex + k] * B[k*lda + i]; - /* C[cIndex] += A[aIndex + k+1] * B[(k+1)*lda + i]; - C[cIndex] += A[aIndex + k+2] * B[(k+2)*lda + i]; - C[cIndex] += A[aIndex + k+3] * B[(k+3)*lda + i]; - C[cIndex] += A[aIndex + k+4] * B[(k+4)*lda + i]; - C[cIndex] += A[aIndex + k+5] * B[(k+5)*lda + i]; - C[cIndex] += A[aIndex + k+6] * B[(k+6)*lda + i]; - C[cIndex] += A[aIndex + k+7] * B[(k+7)*lda + i]; - C[cIndex] += A[aIndex + k+8] * B[(k+8)*lda + i]; - C[cIndex] += A[aIndex + k+9] * B[(k+9)*lda + i]; - C[cIndex] += A[aIndex + k+10] * B[(k+10)*lda + i]; - C[cIndex] += A[aIndex + k+11] * B[(k+11)*lda + i]; - C[cIndex] += A[aIndex + k+12] * B[(k+12)*lda + i]; - C[cIndex] += A[aIndex + k+13] * B[(k+13)*lda + i]; - C[cIndex] += A[aIndex + k+14] * B[(k+14)*lda + i]; - C[cIndex] += A[aIndex + k+15] * B[(k+15)*lda + i];*/ - /* } - } - }*/ - /* - //326378 - data_t bTrans[1024]; - - for (int counti = 0; counti < 32; counti++) { - for (int countj = 0; countj < 32; countj++) { - *(bTrans + counti + countj*lda) = *(B + countj + counti*lda); - } - } - - - int BLOCKSIZE = 8; - for ( i = 0; i < lda; i+=BLOCKSIZE ) - { - for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) { - int iFlag = iTemp*lda; - for ( j = coreid*lda/ncores; j < (coreid+1)*lda/ncores; j++ ) { - int jFlag = j*lda; - int cLoc = jFlag+iTemp; - for ( k = 0; k < lda; k+=8) { - *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k); - *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1); - *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2); - *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3); - *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4); - *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5); - *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6); - *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7); - } - } - } - }*/ - data_t bTrans[1024]; - - for (int counti = 0; counti < 32; counti++) { - for (int countj = 0; countj < 32; countj++) { - *(bTrans + counti + countj*lda) = *(B + countj + counti*lda); - } - } - - - int BLOCKSIZE = 8; - for ( j = 0; j < lda; j++ ) - { - //for ( int jTemp = j; jTemp < j + BLOCKSIZE; jTemp++ ) { - int jFlag = j*lda; - for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i+=BLOCKSIZE ) { - for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) { - - int iFlag = iTemp*lda; - int cLoc = jFlag+iTemp; - for ( k = 0; k < lda; k+=16) { - *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k); - *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1); - *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2); - *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3); - *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4); - *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5); - *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6); - *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7); - *(C+cLoc) += *(A+jFlag+k+8) * *(bTrans+iFlag+k+8); - *(C+cLoc) += *(A+jFlag+k+9) * *(bTrans+iFlag+k+9); - *(C+cLoc) += *(A+jFlag+k+10) * *(bTrans+iFlag+k+10); - *(C+cLoc) += *(A+jFlag+k+11) * *(bTrans+iFlag+k+11); - *(C+cLoc) += *(A+jFlag+k+12) * *(bTrans+iFlag+k+12); - *(C+cLoc) += *(A+jFlag+k+13) * *(bTrans+iFlag+k+13); - *(C+cLoc) += *(A+jFlag+k+14) * *(bTrans+iFlag+k+14); - *(C+cLoc) += *(A+jFlag+k+15) * *(bTrans+iFlag+k+15); - } - } - } - //} - } - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/at_matmul/dataset.h b/mt/at_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/at_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/at_matmul/matmul_gendata.pl b/mt/at_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/at_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/at_matmul/matmul_mi.c b/mt/at_matmul/matmul_mi.c deleted file mode 100644 index 640c32d..0000000 --- a/mt/at_matmul/matmul_mi.c +++ /dev/null @@ -1,317 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: Anirudh Garg -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ -unsigned long _c = -rdcycle(), _i = -rdinstret(); \ -code; \ -_c += rdcycle(), _i += rdinstret(); \ -if (coreid == 0) \ -printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ -stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ -} while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - { - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - } -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k; - - /*547287 - for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ ) - { - for ( j = 0; j < lda; j++ ) - { - int aIndex = j*lda; - int cIndex = i + aIndex; - C[cIndex] += A[aIndex] * B[i]; - C[cIndex] += A[aIndex + 1] * B[1*lda + i]; - C[cIndex] += A[aIndex + 2] * B[2*lda + i]; - C[cIndex] += A[aIndex + 3] * B[3*lda + i]; - C[cIndex] += A[aIndex + 4] * B[4*lda + i]; - C[cIndex] += A[aIndex + 5] * B[5*lda + i]; - C[cIndex] += A[aIndex + 6] * B[6*lda + i]; - C[cIndex] += A[aIndex + 7] * B[7*lda + i]; - C[cIndex] += A[aIndex + 8] * B[8*lda + i]; - C[cIndex] += A[aIndex + 9] * B[9*lda + i]; - C[cIndex] += A[aIndex + 10] * B[10*lda + i]; - C[cIndex] += A[aIndex + 11] * B[11*lda + i]; - C[cIndex] += A[aIndex + 12] * B[12*lda + i]; - C[cIndex] += A[aIndex + 13] * B[13*lda + i]; - C[cIndex] += A[aIndex + 14] * B[14*lda + i]; - C[cIndex] += A[aIndex + 15] * B[15*lda + i]; - C[cIndex] += A[aIndex + 16] * B[16*lda + i]; - C[cIndex] += A[aIndex + 17] * B[17*lda + i]; - C[cIndex] += A[aIndex + 18] * B[18*lda + i]; - C[cIndex] += A[aIndex + 19] * B[19*lda + i]; - C[cIndex] += A[aIndex + 20] * B[20*lda + i]; - C[cIndex] += A[aIndex + 21] * B[21*lda + i]; - C[cIndex] += A[aIndex + 22] * B[22*lda + i]; - C[cIndex] += A[aIndex + 23] * B[23*lda + i]; - C[cIndex] += A[aIndex + 24] * B[24*lda + i]; - C[cIndex] += A[aIndex + 25] * B[25*lda + i]; - C[cIndex] += A[aIndex + 26] * B[26*lda + i]; - C[cIndex] += A[aIndex + 27] * B[27*lda + i]; - C[cIndex] += A[aIndex + 28] * B[28*lda + i]; - C[cIndex] += A[aIndex + 29] * B[29*lda + i]; - C[cIndex] += A[aIndex + 30] * B[30*lda + i]; - C[cIndex] += A[aIndex + 31] * B[31*lda + i]; - } - } - */ - - //492827 - /* for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ ) - { - for ( j = 0; j < lda; j++ ) - { - - int aIndex = j*lda; - int cIndex = i + aIndex; - for ( k = 0; k < lda; k++) - { - C[cIndex] += A[aIndex + k] * B[k*lda + i]; - /* C[cIndex] += A[aIndex + k+1] * B[(k+1)*lda + i]; - C[cIndex] += A[aIndex + k+2] * B[(k+2)*lda + i]; - C[cIndex] += A[aIndex + k+3] * B[(k+3)*lda + i]; - C[cIndex] += A[aIndex + k+4] * B[(k+4)*lda + i]; - C[cIndex] += A[aIndex + k+5] * B[(k+5)*lda + i]; - C[cIndex] += A[aIndex + k+6] * B[(k+6)*lda + i]; - C[cIndex] += A[aIndex + k+7] * B[(k+7)*lda + i]; - C[cIndex] += A[aIndex + k+8] * B[(k+8)*lda + i]; - C[cIndex] += A[aIndex + k+9] * B[(k+9)*lda + i]; - C[cIndex] += A[aIndex + k+10] * B[(k+10)*lda + i]; - C[cIndex] += A[aIndex + k+11] * B[(k+11)*lda + i]; - C[cIndex] += A[aIndex + k+12] * B[(k+12)*lda + i]; - C[cIndex] += A[aIndex + k+13] * B[(k+13)*lda + i]; - C[cIndex] += A[aIndex + k+14] * B[(k+14)*lda + i]; - C[cIndex] += A[aIndex + k+15] * B[(k+15)*lda + i];*/ - /* } - } - }*/ - /* - //326378 - data_t bTrans[1024]; - - for (int counti = 0; counti < 32; counti++) { - for (int countj = 0; countj < 32; countj++) { - *(bTrans + counti + countj*lda) = *(B + countj + counti*lda); - } - } - - - int BLOCKSIZE = 8; - for ( i = 0; i < lda; i+=BLOCKSIZE ) - { - for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) { - int iFlag = iTemp*lda; - for ( j = coreid*lda/ncores; j < (coreid+1)*lda/ncores; j++ ) { - int jFlag = j*lda; - int cLoc = jFlag+iTemp; - for ( k = 0; k < lda; k+=8) { - *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k); - *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1); - *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2); - *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3); - *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4); - *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5); - *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6); - *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7); - } - } - } - }*/ - data_t bTrans[1024]; - - for (int counti = coreid*32/ncores; counti < (coreid+1)*lda/ncores; counti++) { - for (int countj = 0; countj < 32; countj++) { - *(bTrans + counti + countj*lda) = *(B + countj + counti*lda); - } - } - - - int BLOCKSIZE = 8; - for ( j = 0; j < lda; j++ ) - { - //for ( int jTemp = j; jTemp < j + BLOCKSIZE; jTemp++ ) { - int jFlag = j*lda; - for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i+=BLOCKSIZE ) { - for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) { - - int iFlag = iTemp*lda; - int cLoc = jFlag+iTemp; - for ( k = 0; k < lda; k+=16) { - *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k); - *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1); - *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2); - *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3); - *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4); - *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5); - *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6); - *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7); - *(C+cLoc) += *(A+jFlag+k+8) * *(bTrans+iFlag+k+8); - *(C+cLoc) += *(A+jFlag+k+9) * *(bTrans+iFlag+k+9); - *(C+cLoc) += *(A+jFlag+k+10) * *(bTrans+iFlag+k+10); - *(C+cLoc) += *(A+jFlag+k+11) * *(bTrans+iFlag+k+11); - *(C+cLoc) += *(A+jFlag+k+12) * *(bTrans+iFlag+k+12); - *(C+cLoc) += *(A+jFlag+k+13) * *(bTrans+iFlag+k+13); - *(C+cLoc) += *(A+jFlag+k+14) * *(bTrans+iFlag+k+14); - *(C+cLoc) += *(A+jFlag+k+15) * *(bTrans+iFlag+k+15); - } - } - } - //} - } - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/at_vvadd/at_vvadd.c b/mt/at_vvadd/at_vvadd.c deleted file mode 100755 index b271d67..0000000 --- a/mt/at_vvadd/at_vvadd.c +++ /dev/null @@ -1,179 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - - - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - - // interleave accesses - for (i = (coreid*n)/ncores; i < ((coreid+1)*n)/ncores; i++) - { - - - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/at_vvadd/dataset.h b/mt/at_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/at_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/at_vvadd/vvadd_gendata.pl b/mt/at_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/at_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/av_matmul.c b/mt/av_matmul.c new file mode 100644 index 0000000..0f0dbae --- /dev/null +++ b/mt/av_matmul.c @@ -0,0 +1,259 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + //-----------------------------------------------------------------version 2.16, optimize v2.15 get rid of tempb. MSI 83K.w/ test one 81K. + + + static __thread data_t TempA[8]; + static __thread data_t TempB[8]; + static __thread data_t TempC[8]; + static __thread int j,m,n; + + if(coreid == 1 || ncores == 1 ) + { + for ( j = 16; j < 32; j++ ) + { + + for ( m = 0; m < 4; m++ ) + { + + TempA[0] = A[j*lda+0+8*m]; + TempA[1] = A[j*lda+1+8*m]; + TempA[2] = A[j*lda+2+8*m]; + TempA[3] = A[j*lda+3+8*m]; + TempA[4] = A[j*lda+4+8*m]; + TempA[5] = A[j*lda+5+8*m]; + TempA[6] = A[j*lda+6+8*m]; + TempA[7] = A[j*lda+7+8*m]; + + + + for( n = 0; n < 4; n++) + { + + + + + + TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n]; + TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n]; + TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n]; + TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n]; + TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n]; + TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n]; + TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n]; + TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n]; + + + TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n]; + TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n]; + TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n]; + TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n]; + TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n]; + TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n]; + TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n]; + TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n]; + + + + TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n]; + TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n]; + TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n]; + TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n]; + TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n]; + TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n]; + TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n]; + TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n]; + + + + TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n]; + TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n]; + TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n]; + TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n]; + TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n]; + TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n]; + TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n]; + TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n]; + + TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n]; + TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n]; + TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n]; + TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n]; + TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n]; + TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n]; + TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n]; + TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n]; + + + TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n]; + TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n]; + TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n]; + TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n]; + TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n]; + TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n]; + TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n]; + TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n]; + + + + TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n]; + TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n]; + TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n]; + TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n]; + TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n]; + TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n]; + TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n]; + TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n]; + + + TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n]; + TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n]; + TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n]; + TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n]; + TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n]; + TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n]; + TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n]; + TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n]; + + + + C[0+8*n+j*lda] += TempC[0]; + C[1+8*n+j*lda] += TempC[1]; + C[2+8*n+j*lda] += TempC[2]; + C[3+8*n+j*lda] += TempC[3]; + C[4+8*n+j*lda] += TempC[4]; + C[5+8*n+j*lda] += TempC[5]; + C[6+8*n+j*lda] += TempC[6]; + C[7+8*n+j*lda] += TempC[7]; + } + } + } + } + if(coreid == 0) + { + for ( j = 0; j < 16; j++ ) + { + + for ( m = 0; m < 4; m++ ) + { + + TempA[0] = A[j*lda+0+8*m]; + TempA[1] = A[j*lda+1+8*m]; + TempA[2] = A[j*lda+2+8*m]; + TempA[3] = A[j*lda+3+8*m]; + TempA[4] = A[j*lda+4+8*m]; + TempA[5] = A[j*lda+5+8*m]; + TempA[6] = A[j*lda+6+8*m]; + TempA[7] = A[j*lda+7+8*m]; + + + + for( n = 0; n < 4; n++) + { + + + + + + TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n]; + TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n]; + TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n]; + TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n]; + TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n]; + TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n]; + TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n]; + TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n]; + + + TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n]; + TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n]; + TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n]; + TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n]; + TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n]; + TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n]; + TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n]; + TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n]; + + + + TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n]; + TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n]; + TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n]; + TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n]; + TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n]; + TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n]; + TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n]; + TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n]; + + + + TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n]; + TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n]; + TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n]; + TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n]; + TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n]; + TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n]; + TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n]; + TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n]; + + TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n]; + TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n]; + TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n]; + TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n]; + TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n]; + TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n]; + TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n]; + TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n]; + + + TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n]; + TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n]; + TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n]; + TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n]; + TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n]; + TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n]; + TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n]; + TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n]; + + + + TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n]; + TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n]; + TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n]; + TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n]; + TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n]; + TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n]; + TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n]; + TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n]; + + + TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n]; + TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n]; + TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n]; + TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n]; + TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n]; + TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n]; + TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n]; + TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n]; + + + + C[0+8*n+j*lda] += TempC[0]; + C[1+8*n+j*lda] += TempC[1]; + C[2+8*n+j*lda] += TempC[2]; + C[3+8*n+j*lda] += TempC[3]; + C[4+8*n+j*lda] += TempC[4]; + C[5+8*n+j*lda] += TempC[5]; + C[6+8*n+j*lda] += TempC[6]; + C[7+8*n+j*lda] += TempC[7]; + } + } + } + } +} diff --git a/mt/av_matmul/av_matmul.c b/mt/av_matmul/av_matmul.c deleted file mode 100644 index ad5ccdb..0000000 --- a/mt/av_matmul/av_matmul.c +++ /dev/null @@ -1,2902 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - //-------------------------------------------------------------first working version best 500k - /* - static __thread int i, j, k; - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - } - } - - if(coreid ==1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0;k < lda; k++) - { - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - - } - } - } - } - */ - //-------------------------------------------------------------version1.1, take read out of inner loop,300k - /* - static __thread int i, j, k; - static __thread data_t TempA; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += TempA* B[k*lda + i]; - } - } - } - } - - if(coreid ==1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0;k < lda; k++) - { - TempA = A[j*lda + k]; - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += TempA* B[k*lda + i]; - } - } - } - } - */ - //-------------------------------------------------------------version2.0, read 8 elements in B at one time. 140k mi, MSI117.0k - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k*lda+0+8*n]; - TempB[1] = B[k*lda+1+8*n]; - TempB[2] = B[k*lda+2+8*n]; - TempB[3] = B[k*lda+3+8*n]; - TempB[4] = B[k*lda+4+8*n]; - TempB[5] = B[k*lda+5+8*n]; - TempB[6] = B[k*lda+6+8*n]; - TempB[7] = B[k*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA * TempB[0]; - C[1+8*n+j*lda] += TempA * TempB[1]; - C[2+8*n+j*lda] += TempA * TempB[2]; - C[3+8*n+j*lda] += TempA * TempB[3]; - C[4+8*n+j*lda] += TempA * TempB[4]; - C[5+8*n+j*lda] += TempA * TempB[5]; - C[6+8*n+j*lda] += TempA * TempB[6]; - C[7+8*n+j*lda] += TempA * TempB[7]; - - } - - } - } - } - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k*lda+0+8*n]; - TempB[1] = B[k*lda+1+8*n]; - TempB[2] = B[k*lda+2+8*n]; - TempB[3] = B[k*lda+3+8*n]; - TempB[4] = B[k*lda+4+8*n]; - TempB[5] = B[k*lda+5+8*n]; - TempB[6] = B[k*lda+6+8*n]; - TempB[7] = B[k*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA * TempB[0]; - C[1+8*n+j*lda] += TempA * TempB[1]; - C[2+8*n+j*lda] += TempA * TempB[2]; - C[3+8*n+j*lda] += TempA * TempB[3]; - C[4+8*n+j*lda] += TempA * TempB[4]; - C[5+8*n+j*lda] += TempA * TempB[5]; - C[6+8*n+j*lda] += TempA * TempB[6]; - C[7+8*n+j*lda] += TempA * TempB[7]; - - } - - } - } - } - */ - - //-------------------------------------------------------------version2.1, optimize k. 700k. bad move to v2.2. - //-------------------------------------------------------------version2.9 take off all inner loops for both cores, MSI,109K. MI 182k - //-------------------------------------------------------------version2.10 use i= j*lda inside the n loop increase speed. but not out m and n. tried replace first 3, get 104.9k - /* - static __thread int j, m, i,n; - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - i = j*lda; - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[0] * TempB[0]; - C[1+8*n+i] += TempA[0] * TempB[1]; - C[2+8*n+i] += TempA[0] * TempB[2]; - C[3+8*n+i] += TempA[0] * TempB[3]; - C[4+8*n+i] += TempA[0] * TempB[4]; - C[5+8*n+i] += TempA[0] * TempB[5]; - C[6+8*n+i] += TempA[0] * TempB[6]; - C[7+8*n+i] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[1] * TempB[0]; - C[1+8*n+i] += TempA[1] * TempB[1]; - C[2+8*n+i] += TempA[1] * TempB[2]; - C[3+8*n+i] += TempA[1] * TempB[3]; - C[4+8*n+i] += TempA[1] * TempB[4]; - C[5+8*n+i] += TempA[1] * TempB[5]; - C[6+8*n+i] += TempA[1] * TempB[6]; - C[7+8*n+i] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[2] * TempB[0]; - C[1+8*n+i] += TempA[2] * TempB[1]; - C[2+8*n+i] += TempA[2] * TempB[2]; - C[3+8*n+i] += TempA[2] * TempB[3]; - C[4+8*n+i] += TempA[2] * TempB[4]; - C[5+8*n+i] += TempA[2] * TempB[5]; - C[6+8*n+i] += TempA[2] * TempB[6]; - C[7+8*n+i] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[3] * TempB[0]; - C[1+8*n+i] += TempA[3] * TempB[1]; - C[2+8*n+i] += TempA[3] * TempB[2]; - C[3+8*n+i] += TempA[3] * TempB[3]; - C[4+8*n+i] += TempA[3] * TempB[4]; - C[5+8*n+i] += TempA[3] * TempB[5]; - C[6+8*n+i] += TempA[3] * TempB[6]; - C[7+8*n+i] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[4] * TempB[0]; - C[1+8*n+i] += TempA[4] * TempB[1]; - C[2+8*n+i] += TempA[4] * TempB[2]; - C[3+8*n+i] += TempA[4] * TempB[3]; - C[4+8*n+i] += TempA[4] * TempB[4]; - C[5+8*n+i] += TempA[4] * TempB[5]; - C[6+8*n+i] += TempA[4] * TempB[6]; - C[7+8*n+i] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[5] * TempB[0]; - C[1+8*n+i] += TempA[5] * TempB[1]; - C[2+8*n+i] += TempA[5] * TempB[2]; - C[3+8*n+i] += TempA[5] * TempB[3]; - C[4+8*n+i] += TempA[5] * TempB[4]; - C[5+8*n+i] += TempA[5] * TempB[5]; - C[6+8*n+i] += TempA[5] * TempB[6]; - C[7+8*n+i] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[6] * TempB[0]; - C[1+8*n+i] += TempA[6] * TempB[1]; - C[2+8*n+i] += TempA[6] * TempB[2]; - C[3+8*n+i] += TempA[6] * TempB[3]; - C[4+8*n+i] += TempA[6] * TempB[4]; - C[5+8*n+i] += TempA[6] * TempB[5]; - C[6+8*n+i] += TempA[6] * TempB[6]; - C[7+8*n+i] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[7] * TempB[0]; - C[1+8*n+i] += TempA[7] * TempB[1]; - C[2+8*n+i] += TempA[7] * TempB[2]; - C[3+8*n+i] += TempA[7] * TempB[3]; - C[4+8*n+i] += TempA[7] * TempB[4]; - C[5+8*n+i] += TempA[7] * TempB[5]; - C[6+8*n+i] += TempA[7] * TempB[6]; - C[7+8*n+i] += TempA[7] * TempB[7]; - } - - } - } - } - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - i = j*lda; - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[0] * TempB[0]; - C[1+8*n+i] += TempA[0] * TempB[1]; - C[2+8*n+i] += TempA[0] * TempB[2]; - C[3+8*n+i] += TempA[0] * TempB[3]; - C[4+8*n+i] += TempA[0] * TempB[4]; - C[5+8*n+i] += TempA[0] * TempB[5]; - C[6+8*n+i] += TempA[0] * TempB[6]; - C[7+8*n+i] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[1] * TempB[0]; - C[1+8*n+i] += TempA[1] * TempB[1]; - C[2+8*n+i] += TempA[1] * TempB[2]; - C[3+8*n+i] += TempA[1] * TempB[3]; - C[4+8*n+i] += TempA[1] * TempB[4]; - C[5+8*n+i] += TempA[1] * TempB[5]; - C[6+8*n+i] += TempA[1] * TempB[6]; - C[7+8*n+i] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[2] * TempB[0]; - C[1+8*n+i] += TempA[2] * TempB[1]; - C[2+8*n+i] += TempA[2] * TempB[2]; - C[3+8*n+i] += TempA[2] * TempB[3]; - C[4+8*n+i] += TempA[2] * TempB[4]; - C[5+8*n+i] += TempA[2] * TempB[5]; - C[6+8*n+i] += TempA[2] * TempB[6]; - C[7+8*n+i] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[3] * TempB[0]; - C[1+8*n+i] += TempA[3] * TempB[1]; - C[2+8*n+i] += TempA[3] * TempB[2]; - C[3+8*n+i] += TempA[3] * TempB[3]; - C[4+8*n+i] += TempA[3] * TempB[4]; - C[5+8*n+i] += TempA[3] * TempB[5]; - C[6+8*n+i] += TempA[3] * TempB[6]; - C[7+8*n+i] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[4] * TempB[0]; - C[1+8*n+i] += TempA[4] * TempB[1]; - C[2+8*n+i] += TempA[4] * TempB[2]; - C[3+8*n+i] += TempA[4] * TempB[3]; - C[4+8*n+i] += TempA[4] * TempB[4]; - C[5+8*n+i] += TempA[4] * TempB[5]; - C[6+8*n+i] += TempA[4] * TempB[6]; - C[7+8*n+i] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[5] * TempB[0]; - C[1+8*n+i] += TempA[5] * TempB[1]; - C[2+8*n+i] += TempA[5] * TempB[2]; - C[3+8*n+i] += TempA[5] * TempB[3]; - C[4+8*n+i] += TempA[5] * TempB[4]; - C[5+8*n+i] += TempA[5] * TempB[5]; - C[6+8*n+i] += TempA[5] * TempB[6]; - C[7+8*n+i] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[6] * TempB[0]; - C[1+8*n+i] += TempA[6] * TempB[1]; - C[2+8*n+i] += TempA[6] * TempB[2]; - C[3+8*n+i] += TempA[6] * TempB[3]; - C[4+8*n+i] += TempA[6] * TempB[4]; - C[5+8*n+i] += TempA[6] * TempB[5]; - C[6+8*n+i] += TempA[6] * TempB[6]; - C[7+8*n+i] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[7] * TempB[0]; - C[1+8*n+i] += TempA[7] * TempB[1]; - C[2+8*n+i] += TempA[7] * TempB[2]; - C[3+8*n+i] += TempA[7] * TempB[3]; - C[4+8*n+i] += TempA[7] * TempB[4]; - C[5+8*n+i] += TempA[7] * TempB[5]; - C[6+8*n+i] += TempA[7] * TempB[6]; - C[7+8*n+i] += TempA[7] * TempB[7]; - } - - } - } - } - - */ - //-------------------------------------------------------------version2.2, optimize k. from 4 instead of 8 like v2.1, random failing on MI, unknown reason, MSI,350K, take off each inner loop for core 0 260k, both cores 134k - //-------------------------------------------------------------try false sharing for core 0, 136k. - /* - static __thread int j, m, n; - static __thread data_t TempA[4]; - static __thread data_t TempB[4]; - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( m = 0; m < 8; m++ ) - { - TempA[0] = A[j*lda+0+4*m]; - TempA[1] = A[j*lda+1+4*m]; - TempA[2] = A[j*lda+2+4*m]; - TempA[3] = A[j*lda+3+4*m]; - - for( n = 0; n < 8; n++) - { - - TempB[0] = B[(0+4*m)*lda+0+4*n]; - TempB[1] = B[(0+4*m)*lda+1+4*n]; - TempB[2] = B[(0+4*m)*lda+2+4*n]; - TempB[3] = B[(0+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[0] * TempB[0]; - C[1+4*n+j*lda] += TempA[0] * TempB[1]; - C[2+4*n+j*lda] += TempA[0] * TempB[2]; - C[3+4*n+j*lda] += TempA[0] * TempB[3]; - - - - - - TempB[0] = B[(1+4*m)*lda+0+4*n]; - TempB[1] = B[(1+4*m)*lda+1+4*n]; - TempB[2] = B[(1+4*m)*lda+2+4*n]; - TempB[3] = B[(1+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[1] * TempB[0]; - C[1+4*n+j*lda] += TempA[1] * TempB[1]; - C[2+4*n+j*lda] += TempA[1] * TempB[2]; - C[3+4*n+j*lda] += TempA[1] * TempB[3]; - - - - TempB[0] = B[(2+4*m)*lda+0+4*n]; - TempB[1] = B[(2+4*m)*lda+1+4*n]; - TempB[2] = B[(2+4*m)*lda+2+4*n]; - TempB[3] = B[(2+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[2] * TempB[0]; - C[1+4*n+j*lda] += TempA[2] * TempB[1]; - C[2+4*n+j*lda] += TempA[2] * TempB[2]; - C[3+4*n+j*lda] += TempA[2] * TempB[3]; - - - - - TempB[0] = B[(3+4*m)*lda+0+4*n]; - TempB[1] = B[(3+4*m)*lda+1+4*n]; - TempB[2] = B[(3+4*m)*lda+2+4*n]; - TempB[3] = B[(3+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[3] * TempB[0]; - C[1+4*n+j*lda] += TempA[3] * TempB[1]; - C[2+4*n+j*lda] += TempA[3] * TempB[2]; - C[3+4*n+j*lda] += TempA[3] * TempB[3]; - - - } - } - } - } - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( m = 0; m < 8; m++ ) - { - TempA[0] = A[j*lda+0+4*m]; - TempA[1] = A[j*lda+1+4*m]; - TempA[2] = A[j*lda+2+4*m]; - TempA[3] = A[j*lda+3+4*m]; - - for( n = 0; n < 8; n++) - { - - - - - - - - TempB[0] = B[(1+4*m)*lda+0+4*n]; - TempB[1] = B[(1+4*m)*lda+1+4*n]; - TempB[2] = B[(1+4*m)*lda+2+4*n]; - TempB[3] = B[(1+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[1] * TempB[0]; - C[1+4*n+j*lda] += TempA[1] * TempB[1]; - C[2+4*n+j*lda] += TempA[1] * TempB[2]; - C[3+4*n+j*lda] += TempA[1] * TempB[3]; - - - - TempB[0] = B[(2+4*m)*lda+0+4*n]; - TempB[1] = B[(2+4*m)*lda+1+4*n]; - TempB[2] = B[(2+4*m)*lda+2+4*n]; - TempB[3] = B[(2+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[2] * TempB[0]; - C[1+4*n+j*lda] += TempA[2] * TempB[1]; - C[2+4*n+j*lda] += TempA[2] * TempB[2]; - C[3+4*n+j*lda] += TempA[2] * TempB[3]; - - - - - TempB[0] = B[(3+4*m)*lda+0+4*n]; - TempB[1] = B[(3+4*m)*lda+1+4*n]; - TempB[2] = B[(3+4*m)*lda+2+4*n]; - TempB[3] = B[(3+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[3] * TempB[0]; - C[1+4*n+j*lda] += TempA[3] * TempB[1]; - C[2+4*n+j*lda] += TempA[3] * TempB[2]; - C[3+4*n+j*lda] += TempA[3] * TempB[3]; - - TempB[0] = B[(0+4*m)*lda+0+4*n]; - TempB[1] = B[(0+4*m)*lda+1+4*n]; - TempB[2] = B[(0+4*m)*lda+2+4*n]; - TempB[3] = B[(0+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[0] * TempB[0]; - C[1+4*n+j*lda] += TempA[0] * TempB[1]; - C[2+4*n+j*lda] += TempA[0] * TempB[2]; - C[3+4*n+j*lda] += TempA[0] * TempB[3]; - - - } - } - } - } - */ - - - - //-------------------------------------------------------------version2.3, read 8 elements in B at one time. make k to 2. 150k mi 128k msi. worse than v2.0 - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA[2]; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( m = 0; m < 16; m++ ) - { - TempA[0] = A[j*lda + 0 + 2*m]; - TempA[1] = A[j*lda + 1 + 2*m]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[2*m*lda+0+8*n]; - TempB[1] = B[2*m*lda+1+8*n]; - TempB[2] = B[2*m*lda+2+8*n]; - TempB[3] = B[2*m*lda+3+8*n]; - TempB[4] = B[2*m*lda+4+8*n]; - TempB[5] = B[2*m*lda+5+8*n]; - TempB[6] = B[2*m*lda+6+8*n]; - TempB[7] = B[2*m*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - TempB[0] = B[(1+2*m)*lda+0+8*n]; - TempB[1] = B[(1+2*m)*lda+1+8*n]; - TempB[2] = B[(1+2*m)*lda+2+8*n]; - TempB[3] = B[(1+2*m)*lda+3+8*n]; - TempB[4] = B[(1+2*m)*lda+4+8*n]; - TempB[5] = B[(1+2*m)*lda+5+8*n]; - TempB[6] = B[(1+2*m)*lda+6+8*n]; - TempB[7] = B[(1+2*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - } - - } - } - } - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( m = 0; m < 16; m++ ) - { - TempA[0] = A[j*lda + 0 + 2*m]; - TempA[1] = A[j*lda + 1 + 2*m]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[2*m*lda+0+8*n]; - TempB[1] = B[2*m*lda+1+8*n]; - TempB[2] = B[2*m*lda+2+8*n]; - TempB[3] = B[2*m*lda+3+8*n]; - TempB[4] = B[2*m*lda+4+8*n]; - TempB[5] = B[2*m*lda+5+8*n]; - TempB[6] = B[2*m*lda+6+8*n]; - TempB[7] = B[2*m*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - TempB[0] = B[(1+2*m)*lda+0+8*n]; - TempB[1] = B[(1+2*m)*lda+1+8*n]; - TempB[2] = B[(1+2*m)*lda+2+8*n]; - TempB[3] = B[(1+2*m)*lda+3+8*n]; - TempB[4] = B[(1+2*m)*lda+4+8*n]; - TempB[5] = B[(1+2*m)*lda+5+8*n]; - TempB[6] = B[(1+2*m)*lda+6+8*n]; - TempB[7] = B[(1+2*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - } - - } - } - } - */ - //-------------------------------------------------------------version2.4, read 4 170k and 16 140k, error because not enough space elements in B at one time. - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[16]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - TempB[8] = B[k*lda+8+16*n]; - TempB[9] = B[k*lda+9+16*n]; - TempB[10] = B[k*lda+10+16*n]; - TempB[11] = B[k*lda+11+16*n]; - TempB[12] = B[k*lda+12+16*n]; - TempB[13] = B[k*lda+13+16*n]; - TempB[14] = B[k*lda+14+16*n]; - TempB[15] = B[k*lda+15+16*n]; - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - C[8+16*n+j*lda] += TempA * TempB[8]; - C[9+16*n+j*lda] += TempA * TempB[9]; - C[10+16*n+j*lda] += TempA * TempB[10]; - C[11+16*n+j*lda] += TempA * TempB[11]; - C[12+16*n+j*lda] += TempA * TempB[12]; - C[13+16*n+j*lda] += TempA * TempB[13]; - C[14+16*n+j*lda] += TempA * TempB[14]; - C[15+16*n+j*lda] += TempA * TempB[15]; - - - - } - - } - } - } - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - TempB[8] = B[k*lda+8+16*n]; - TempB[9] = B[k*lda+9+16*n]; - TempB[10] = B[k*lda+10+16*n]; - TempB[11] = B[k*lda+11+16*n]; - TempB[12] = B[k*lda+12+16*n]; - TempB[13] = B[k*lda+13+16*n]; - TempB[14] = B[k*lda+14+16*n]; - TempB[15] = B[k*lda+15+16*n]; - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - C[8+16*n+j*lda] += TempA * TempB[8]; - C[9+16*n+j*lda] += TempA * TempB[9]; - C[10+16*n+j*lda] += TempA * TempB[10]; - C[11+16*n+j*lda] += TempA * TempB[11]; - C[12+16*n+j*lda] += TempA * TempB[12]; - C[13+16*n+j*lda] += TempA * TempB[13]; - C[14+16*n+j*lda] += TempA * TempB[14]; - C[15+16*n+j*lda] += TempA * TempB[15]; - - - - } - - } - } - } - - */ - //-------------------------------------------------------------version2.5, read 10 elements in B at one time. has corner cases. Turns out it hangs. - /* - static __thread int j, k, n; - static __thread data_t TempA; - static __thread data_t TempB[10]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 3; n++) - { - TempB[0] = B[k*lda+0+10*n]; - TempB[1] = B[k*lda+1+10*n]; - TempB[2] = B[k*lda+2+10*n]; - TempB[3] = B[k*lda+3+10*n]; - TempB[4] = B[k*lda+4+10*n]; - TempB[5] = B[k*lda+5+10*n]; - TempB[6] = B[k*lda+6+10*n]; - TempB[7] = B[k*lda+7+10*n]; - TempB[8] = B[k*lda+8+10*n]; - TempB[9] = B[k*lda+9+10*n]; - - C[0+10*n+j*lda] += TempA * TempB[0]; - C[1+10*n+j*lda] += TempA * TempB[1]; - C[2+10*n+j*lda] += TempA * TempB[2]; - C[3+10*n+j*lda] += TempA * TempB[3]; - C[4+10*n+j*lda] += TempA * TempB[4]; - C[5+10*n+j*lda] += TempA * TempB[5]; - C[6+10*n+j*lda] += TempA * TempB[6]; - C[7+10*n+j*lda] += TempA * TempB[7]; - C[8+10*n+j*lda] += TempA * TempB[8]; - C[9+10*n+j*lda] += TempA * TempB[9]; - } - TempB[0] = B[k*lda+30]; - TempB[1] = B[k*lda+31]; - C[30+j*lda] += TempA * TempB[0]; - C[31+j*lda] += TempA * TempB[1]; - } - } - } - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 3; n++) - { - TempB[0] = B[k*lda+0+10*n]; - TempB[1] = B[k*lda+1+10*n]; - TempB[2] = B[k*lda+2+10*n]; - TempB[3] = B[k*lda+3+10*n]; - TempB[4] = B[k*lda+4+10*n]; - TempB[5] = B[k*lda+5+10*n]; - TempB[6] = B[k*lda+6+10*n]; - TempB[7] = B[k*lda+7+10*n]; - TempB[8] = B[k*lda+8+10*n]; - TempB[9] = B[k*lda+9+10*n]; - - C[0+10*n+j*lda] += TempA * TempB[0]; - C[1+10*n+j*lda] += TempA * TempB[1]; - C[2+10*n+j*lda] += TempA * TempB[2]; - C[3+10*n+j*lda] += TempA * TempB[3]; - C[4+10*n+j*lda] += TempA * TempB[4]; - C[5+10*n+j*lda] += TempA * TempB[5]; - C[6+10*n+j*lda] += TempA * TempB[6]; - C[7+10*n+j*lda] += TempA * TempB[7]; - C[8+10*n+j*lda] += TempA * TempB[8]; - C[9+10*n+j*lda] += TempA * TempB[9]; - } - TempB[0] = B[k*lda+30]; - TempB[1] = B[k*lda+31]; - C[30+j*lda] += TempA * TempB[0]; - C[31+j*lda] += TempA * TempB[1]; - } - } - } - - */ - - //-------------------------------------------------------------version2.6, optimize 2.0. take off n loop and tried different order of reading B - /* - static __thread int j, k, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - - TempB[0] = B[k*lda+0]; - TempB[1] = B[k*lda+1]; - TempB[2] = B[k*lda+2]; - TempB[3] = B[k*lda+3]; - TempB[4] = B[k*lda+4]; - TempB[5] = B[k*lda+5]; - TempB[6] = B[k*lda+6]; - TempB[7] = B[k*lda+7]; - - C[0+j*lda] += TempA * TempB[0]; - C[1+j*lda] += TempA * TempB[1]; - C[2+j*lda] += TempA * TempB[2]; - C[3+j*lda] += TempA * TempB[3]; - C[4+j*lda] += TempA * TempB[4]; - C[5+j*lda] += TempA * TempB[5]; - C[6+j*lda] += TempA * TempB[6]; - C[7+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+8]; - TempB[1] = B[k*lda+9]; - TempB[2] = B[k*lda+10]; - TempB[3] = B[k*lda+11]; - TempB[4] = B[k*lda+12]; - TempB[5] = B[k*lda+13]; - TempB[6] = B[k*lda+14]; - TempB[7] = B[k*lda+15]; - - C[8+j*lda] += TempA * TempB[0]; - C[9+j*lda] += TempA * TempB[1]; - C[10+j*lda] += TempA * TempB[2]; - C[11+j*lda] += TempA * TempB[3]; - C[12+j*lda] += TempA * TempB[4]; - C[13+j*lda] += TempA * TempB[5]; - C[14+j*lda] += TempA * TempB[6]; - C[15+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+16]; - TempB[1] = B[k*lda+17]; - TempB[2] = B[k*lda+18]; - TempB[3] = B[k*lda+19]; - TempB[4] = B[k*lda+20]; - TempB[5] = B[k*lda+21]; - TempB[6] = B[k*lda+22]; - TempB[7] = B[k*lda+23]; - - C[16+j*lda] += TempA * TempB[0]; - C[17+j*lda] += TempA * TempB[1]; - C[18+j*lda] += TempA * TempB[2]; - C[19+j*lda] += TempA * TempB[3]; - C[20+j*lda] += TempA * TempB[4]; - C[21+j*lda] += TempA * TempB[5]; - C[22+j*lda] += TempA * TempB[6]; - C[23+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+24]; - TempB[1] = B[k*lda+25]; - TempB[2] = B[k*lda+26]; - TempB[3] = B[k*lda+27]; - TempB[4] = B[k*lda+28]; - TempB[5] = B[k*lda+29]; - TempB[6] = B[k*lda+30]; - TempB[7] = B[k*lda+31]; - - C[24+j*lda] += TempA * TempB[0]; - C[25+j*lda] += TempA * TempB[1]; - C[26+j*lda] += TempA * TempB[2]; - C[27+j*lda] += TempA * TempB[3]; - C[28+j*lda] += TempA * TempB[4]; - C[29+j*lda] += TempA * TempB[5]; - C[30+j*lda] += TempA * TempB[6]; - C[31+j*lda] += TempA * TempB[7]; - - - - } - } - } - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - - - TempB[0] = B[k*lda+24]; - TempB[1] = B[k*lda+25]; - TempB[2] = B[k*lda+26]; - TempB[3] = B[k*lda+27]; - TempB[4] = B[k*lda+28]; - TempB[5] = B[k*lda+29]; - TempB[6] = B[k*lda+30]; - TempB[7] = B[k*lda+31]; - - C[24+j*lda] += TempA * TempB[0]; - C[25+j*lda] += TempA * TempB[1]; - C[26+j*lda] += TempA * TempB[2]; - C[27+j*lda] += TempA * TempB[3]; - C[28+j*lda] += TempA * TempB[4]; - C[29+j*lda] += TempA * TempB[5]; - C[30+j*lda] += TempA * TempB[6]; - C[31+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+0]; - TempB[1] = B[k*lda+1]; - TempB[2] = B[k*lda+2]; - TempB[3] = B[k*lda+3]; - TempB[4] = B[k*lda+4]; - TempB[5] = B[k*lda+5]; - TempB[6] = B[k*lda+6]; - TempB[7] = B[k*lda+7]; - - C[0+j*lda] += TempA * TempB[0]; - C[1+j*lda] += TempA * TempB[1]; - C[2+j*lda] += TempA * TempB[2]; - C[3+j*lda] += TempA * TempB[3]; - C[4+j*lda] += TempA * TempB[4]; - C[5+j*lda] += TempA * TempB[5]; - C[6+j*lda] += TempA * TempB[6]; - C[7+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+8]; - TempB[1] = B[k*lda+9]; - TempB[2] = B[k*lda+10]; - TempB[3] = B[k*lda+11]; - TempB[4] = B[k*lda+12]; - TempB[5] = B[k*lda+13]; - TempB[6] = B[k*lda+14]; - TempB[7] = B[k*lda+15]; - - C[8+j*lda] += TempA * TempB[0]; - C[9+j*lda] += TempA * TempB[1]; - C[10+j*lda] += TempA * TempB[2]; - C[11+j*lda] += TempA * TempB[3]; - C[12+j*lda] += TempA * TempB[4]; - C[13+j*lda] += TempA * TempB[5]; - C[14+j*lda] += TempA * TempB[6]; - C[15+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+16]; - TempB[1] = B[k*lda+17]; - TempB[2] = B[k*lda+18]; - TempB[3] = B[k*lda+19]; - TempB[4] = B[k*lda+20]; - TempB[5] = B[k*lda+21]; - TempB[6] = B[k*lda+22]; - TempB[7] = B[k*lda+23]; - - C[16+j*lda] += TempA * TempB[0]; - C[17+j*lda] += TempA * TempB[1]; - C[18+j*lda] += TempA * TempB[2]; - C[19+j*lda] += TempA * TempB[3]; - C[20+j*lda] += TempA * TempB[4]; - C[21+j*lda] += TempA * TempB[5]; - C[22+j*lda] += TempA * TempB[6]; - C[23+j*lda] += TempA * TempB[7]; - - - - - - - } - } - } - */ - //-------------------------------------------------------------version2.7, use m=l*da, i=k*lda,out of stack, only i, MI 150k, only m, MSI 117.9k slower than v2.0 - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - m = j * lda; - for ( k = 0; k < lda; k++ ) - { - TempA = A[m+ k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k *lda+0+8*n]; - TempB[1] = B[k *lda+1+8*n]; - TempB[2] = B[k *lda+2+8*n]; - TempB[3] = B[k *lda+3+8*n]; - TempB[4] = B[k *lda+4+8*n]; - TempB[5] = B[k *lda+5+8*n]; - TempB[6] = B[k *lda+6+8*n]; - TempB[7] = B[k *lda+7+8*n]; - - C[0+8*n+m] += TempA * TempB[0]; - C[1+8*n+m] += TempA * TempB[1]; - C[2+8*n+m] += TempA * TempB[2]; - C[3+8*n+m] += TempA * TempB[3]; - C[4+8*n+m] += TempA * TempB[4]; - C[5+8*n+m] += TempA * TempB[5]; - C[6+8*n+m] += TempA * TempB[6]; - C[7+8*n+m] += TempA * TempB[7]; - - } - - } - } - } -if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - m = j * lda; - for ( k = 0; k < lda; k++ ) - { - TempA = A[m+ k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k *lda+0+8*n]; - TempB[1] = B[k *lda+1+8*n]; - TempB[2] = B[k *lda+2+8*n]; - TempB[3] = B[k *lda+3+8*n]; - TempB[4] = B[k *lda+4+8*n]; - TempB[5] = B[k *lda+5+8*n]; - TempB[6] = B[k *lda+6+8*n]; - TempB[7] = B[k *lda+7+8*n]; - - C[0+8*n+m] += TempA * TempB[0]; - C[1+8*n+m] += TempA * TempB[1]; - C[2+8*n+m] += TempA * TempB[2]; - C[3+8*n+m] += TempA * TempB[3]; - C[4+8*n+m] += TempA * TempB[4]; - C[5+8*n+m] += TempA * TempB[5]; - C[6+8*n+m] += TempA * TempB[6]; - C[7+8*n+m] += TempA * TempB[7]; - - } - - } - } - } - */ -//-------------------------------------------------------------version2.8 deal with false sharing, MSI,118K vs v2.0 117.0K. MI 147.629K. -/* -static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+8+16*n]; - TempB[1] = B[k*lda+9+16*n]; - TempB[2] = B[k*lda+10+16*n]; - TempB[3] = B[k*lda+11+16*n]; - TempB[4] = B[k*lda+12+16*n]; - TempB[5] = B[k*lda+13+16*n]; - TempB[6] = B[k*lda+14+16*n]; - TempB[7] = B[k*lda+15+16*n]; - - C[8+16*n+j*lda] += TempA * TempB[0]; - C[9+16*n+j*lda] += TempA * TempB[1]; - C[10+16*n+j*lda] += TempA * TempB[2]; - C[11+16*n+j*lda] += TempA * TempB[3]; - C[12+16*n+j*lda] += TempA * TempB[4]; - C[13+16*n+j*lda] += TempA * TempB[5]; - C[14+16*n+j*lda] += TempA * TempB[6]; - C[15+16*n+j*lda] += TempA * TempB[7]; - - - - } - - } - } - } - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - - - TempB[0] = B[k*lda+8+16*n]; - TempB[1] = B[k*lda+9+16*n]; - TempB[2] = B[k*lda+10+16*n]; - TempB[3] = B[k*lda+11+16*n]; - TempB[4] = B[k*lda+12+16*n]; - TempB[5] = B[k*lda+13+16*n]; - TempB[6] = B[k*lda+14+16*n]; - TempB[7] = B[k*lda+15+16*n]; - - C[8+16*n+j*lda] += TempA * TempB[0]; - C[9+16*n+j*lda] += TempA * TempB[1]; - C[10+16*n+j*lda] += TempA * TempB[2]; - C[11+16*n+j*lda] += TempA * TempB[3]; - C[12+16*n+j*lda] += TempA * TempB[4]; - C[13+16*n+j*lda] += TempA * TempB[5]; - C[14+16*n+j*lda] += TempA * TempB[6]; - C[15+16*n+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - - - } - - } - } - } - */ - - //----------------------------------------------------------------version 2.11 optmize j,use core 1 j from 0 to 15 MSI 98k i = j*lda - //----------------------------------------------------------------version 2.12 not use i = j *lda MSI 95k - /* - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - static __thread int j,m,n,i,k; - - if(coreid == 1) - { - for ( j = 16; j < 32; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[2] * TempB[0]; - C[1+8*n+j*lda] += TempA[2] * TempB[1]; - C[2+8*n+j*lda] += TempA[2] * TempB[2]; - C[3+8*n+j*lda] += TempA[2] * TempB[3]; - C[4+8*n+j*lda] += TempA[2] * TempB[4]; - C[5+8*n+j*lda] += TempA[2] * TempB[5]; - C[6+8*n+j*lda] += TempA[2] * TempB[6]; - C[7+8*n+j*lda] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[3] * TempB[0]; - C[1+8*n+j*lda] += TempA[3] * TempB[1]; - C[2+8*n+j*lda] += TempA[3] * TempB[2]; - C[3+8*n+j*lda] += TempA[3] * TempB[3]; - C[4+8*n+j*lda] += TempA[3] * TempB[4]; - C[5+8*n+j*lda] += TempA[3] * TempB[5]; - C[6+8*n+j*lda] += TempA[3] * TempB[6]; - C[7+8*n+j*lda] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[4] * TempB[0]; - C[1+8*n+j*lda] += TempA[4] * TempB[1]; - C[2+8*n+j*lda] += TempA[4] * TempB[2]; - C[3+8*n+j*lda] += TempA[4] * TempB[3]; - C[4+8*n+j*lda] += TempA[4] * TempB[4]; - C[5+8*n+j*lda] += TempA[4] * TempB[5]; - C[6+8*n+j*lda] += TempA[4] * TempB[6]; - C[7+8*n+j*lda] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[5] * TempB[0]; - C[1+8*n+j*lda] += TempA[5] * TempB[1]; - C[2+8*n+j*lda] += TempA[5] * TempB[2]; - C[3+8*n+j*lda] += TempA[5] * TempB[3]; - C[4+8*n+j*lda] += TempA[5] * TempB[4]; - C[5+8*n+j*lda] += TempA[5] * TempB[5]; - C[6+8*n+j*lda] += TempA[5] * TempB[6]; - C[7+8*n+j*lda] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[6] * TempB[0]; - C[1+8*n+j*lda] += TempA[6] * TempB[1]; - C[2+8*n+j*lda] += TempA[6] * TempB[2]; - C[3+8*n+j*lda] += TempA[6] * TempB[3]; - C[4+8*n+j*lda] += TempA[6] * TempB[4]; - C[5+8*n+j*lda] += TempA[6] * TempB[5]; - C[6+8*n+j*lda] += TempA[6] * TempB[6]; - C[7+8*n+j*lda] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[7] * TempB[0]; - C[1+8*n+j*lda] += TempA[7] * TempB[1]; - C[2+8*n+j*lda] += TempA[7] * TempB[2]; - C[3+8*n+j*lda] += TempA[7] * TempB[3]; - C[4+8*n+j*lda] += TempA[7] * TempB[4]; - C[5+8*n+j*lda] += TempA[7] * TempB[5]; - C[6+8*n+j*lda] += TempA[7] * TempB[6]; - C[7+8*n+j*lda] += TempA[7] * TempB[7]; - } - - } - } - } - if(coreid ==0) - { - for ( j = 0; j < 16; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[2] * TempB[0]; - C[1+8*n+j*lda] += TempA[2] * TempB[1]; - C[2+8*n+j*lda] += TempA[2] * TempB[2]; - C[3+8*n+j*lda] += TempA[2] * TempB[3]; - C[4+8*n+j*lda] += TempA[2] * TempB[4]; - C[5+8*n+j*lda] += TempA[2] * TempB[5]; - C[6+8*n+j*lda] += TempA[2] * TempB[6]; - C[7+8*n+j*lda] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[3] * TempB[0]; - C[1+8*n+j*lda] += TempA[3] * TempB[1]; - C[2+8*n+j*lda] += TempA[3] * TempB[2]; - C[3+8*n+j*lda] += TempA[3] * TempB[3]; - C[4+8*n+j*lda] += TempA[3] * TempB[4]; - C[5+8*n+j*lda] += TempA[3] * TempB[5]; - C[6+8*n+j*lda] += TempA[3] * TempB[6]; - C[7+8*n+j*lda] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[4] * TempB[0]; - C[1+8*n+j*lda] += TempA[4] * TempB[1]; - C[2+8*n+j*lda] += TempA[4] * TempB[2]; - C[3+8*n+j*lda] += TempA[4] * TempB[3]; - C[4+8*n+j*lda] += TempA[4] * TempB[4]; - C[5+8*n+j*lda] += TempA[4] * TempB[5]; - C[6+8*n+j*lda] += TempA[4] * TempB[6]; - C[7+8*n+j*lda] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[5] * TempB[0]; - C[1+8*n+j*lda] += TempA[5] * TempB[1]; - C[2+8*n+j*lda] += TempA[5] * TempB[2]; - C[3+8*n+j*lda] += TempA[5] * TempB[3]; - C[4+8*n+j*lda] += TempA[5] * TempB[4]; - C[5+8*n+j*lda] += TempA[5] * TempB[5]; - C[6+8*n+j*lda] += TempA[5] * TempB[6]; - C[7+8*n+j*lda] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[6] * TempB[0]; - C[1+8*n+j*lda] += TempA[6] * TempB[1]; - C[2+8*n+j*lda] += TempA[6] * TempB[2]; - C[3+8*n+j*lda] += TempA[6] * TempB[3]; - C[4+8*n+j*lda] += TempA[6] * TempB[4]; - C[5+8*n+j*lda] += TempA[6] * TempB[5]; - C[6+8*n+j*lda] += TempA[6] * TempB[6]; - C[7+8*n+j*lda] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[7] * TempB[0]; - C[1+8*n+j*lda] += TempA[7] * TempB[1]; - C[2+8*n+j*lda] += TempA[7] * TempB[2]; - C[3+8*n+j*lda] += TempA[7] * TempB[3]; - C[4+8*n+j*lda] += TempA[7] * TempB[4]; - C[5+8*n+j*lda] += TempA[7] * TempB[5]; - C[6+8*n+j*lda] += TempA[7] * TempB[6]; - C[7+8*n+j*lda] += TempA[7] * TempB[7]; - } - - } - } - } - */ - //-----------------------------------------------------------------version 2.14 optimize C. when tempc[8] inside n loop, MSI, 98K MI,158k - //-----------------------------------------------------------------version 2.15 optimize v2.14 a little MSI 89k. MI, 161K. don't decare tempc[8]=0 in the loop - /* - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - static __thread data_t TempC[8]; - static __thread int j,m,n,i,k; - - if(coreid == 1) - { - for ( j = 16; j < 32; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - - - for( n = 0; n < 4; n++) - { - - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - - TempC[0] = TempA[0] * TempB[0]; - TempC[1] = TempA[0] * TempB[1]; - TempC[2] = TempA[0] * TempB[2]; - TempC[3] = TempA[0] * TempB[3]; - TempC[4] = TempA[0] * TempB[4]; - TempC[5] = TempA[0] * TempB[5]; - TempC[6] = TempA[0] * TempB[6]; - TempC[7] = TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - TempC[0] += TempA[1] * TempB[0]; - TempC[1] += TempA[1] * TempB[1]; - TempC[2] += TempA[1] * TempB[2]; - TempC[3] += TempA[1] * TempB[3]; - TempC[4] += TempA[1] * TempB[4]; - TempC[5] += TempA[1] * TempB[5]; - TempC[6] += TempA[1] * TempB[6]; - TempC[7] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - TempC[0] += TempA[2] * TempB[0]; - TempC[1] += TempA[2] * TempB[1]; - TempC[2] += TempA[2] * TempB[2]; - TempC[3] += TempA[2] * TempB[3]; - TempC[4] += TempA[2] * TempB[4]; - TempC[5] += TempA[2] * TempB[5]; - TempC[6] += TempA[2] * TempB[6]; - TempC[7] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - TempC[0] += TempA[3] * TempB[0]; - TempC[1] += TempA[3] * TempB[1]; - TempC[2] += TempA[3] * TempB[2]; - TempC[3] += TempA[3] * TempB[3]; - TempC[4] += TempA[3] * TempB[4]; - TempC[5] += TempA[3] * TempB[5]; - TempC[6] += TempA[3] * TempB[6]; - TempC[7] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - TempC[0] += TempA[4] * TempB[0]; - TempC[1] += TempA[4] * TempB[1]; - TempC[2] += TempA[4] * TempB[2]; - TempC[3] += TempA[4] * TempB[3]; - TempC[4] += TempA[4] * TempB[4]; - TempC[5] += TempA[4] * TempB[5]; - TempC[6] += TempA[4] * TempB[6]; - TempC[7] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - TempC[0] += TempA[5] * TempB[0]; - TempC[1] += TempA[5] * TempB[1]; - TempC[2] += TempA[5] * TempB[2]; - TempC[3] += TempA[5] * TempB[3]; - TempC[4] += TempA[5] * TempB[4]; - TempC[5] += TempA[5] * TempB[5]; - TempC[6] += TempA[5] * TempB[6]; - TempC[7] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - TempC[0] += TempA[6] * TempB[0]; - TempC[1] += TempA[6] * TempB[1]; - TempC[2] += TempA[6] * TempB[2]; - TempC[3] += TempA[6] * TempB[3]; - TempC[4] += TempA[6] * TempB[4]; - TempC[5] += TempA[6] * TempB[5]; - TempC[6] += TempA[6] * TempB[6]; - TempC[7] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - TempC[0] += TempA[7] * TempB[0]; - TempC[1] += TempA[7] * TempB[1]; - TempC[2] += TempA[7] * TempB[2]; - TempC[3] += TempA[7] * TempB[3]; - TempC[4] += TempA[7] * TempB[4]; - TempC[5] += TempA[7] * TempB[5]; - TempC[6] += TempA[7] * TempB[6]; - TempC[7] += TempA[7] * TempB[7]; - - - - C[0+8*n+j*lda] += TempC[0]; - C[1+8*n+j*lda] += TempC[1]; - C[2+8*n+j*lda] += TempC[2]; - C[3+8*n+j*lda] += TempC[3]; - C[4+8*n+j*lda] += TempC[4]; - C[5+8*n+j*lda] += TempC[5]; - C[6+8*n+j*lda] += TempC[6]; - C[7+8*n+j*lda] += TempC[7]; - } - } - } - } - if(coreid == 0) - { - for ( j = 0; j < 16; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - - TempC[0] = TempA[0] * TempB[0]; - TempC[1] = TempA[0] * TempB[1]; - TempC[2] = TempA[0] * TempB[2]; - TempC[3] = TempA[0] * TempB[3]; - TempC[4] = TempA[0] * TempB[4]; - TempC[5] = TempA[0] * TempB[5]; - TempC[6] = TempA[0] * TempB[6]; - TempC[7] = TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - TempC[0] += TempA[1] * TempB[0]; - TempC[1] += TempA[1] * TempB[1]; - TempC[2] += TempA[1] * TempB[2]; - TempC[3] += TempA[1] * TempB[3]; - TempC[4] += TempA[1] * TempB[4]; - TempC[5] += TempA[1] * TempB[5]; - TempC[6] += TempA[1] * TempB[6]; - TempC[7] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - TempC[0] += TempA[2] * TempB[0]; - TempC[1] += TempA[2] * TempB[1]; - TempC[2] += TempA[2] * TempB[2]; - TempC[3] += TempA[2] * TempB[3]; - TempC[4] += TempA[2] * TempB[4]; - TempC[5] += TempA[2] * TempB[5]; - TempC[6] += TempA[2] * TempB[6]; - TempC[7] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - TempC[0] += TempA[3] * TempB[0]; - TempC[1] += TempA[3] * TempB[1]; - TempC[2] += TempA[3] * TempB[2]; - TempC[3] += TempA[3] * TempB[3]; - TempC[4] += TempA[3] * TempB[4]; - TempC[5] += TempA[3] * TempB[5]; - TempC[6] += TempA[3] * TempB[6]; - TempC[7] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - TempC[0] += TempA[4] * TempB[0]; - TempC[1] += TempA[4] * TempB[1]; - TempC[2] += TempA[4] * TempB[2]; - TempC[3] += TempA[4] * TempB[3]; - TempC[4] += TempA[4] * TempB[4]; - TempC[5] += TempA[4] * TempB[5]; - TempC[6] += TempA[4] * TempB[6]; - TempC[7] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - TempC[0] += TempA[5] * TempB[0]; - TempC[1] += TempA[5] * TempB[1]; - TempC[2] += TempA[5] * TempB[2]; - TempC[3] += TempA[5] * TempB[3]; - TempC[4] += TempA[5] * TempB[4]; - TempC[5] += TempA[5] * TempB[5]; - TempC[6] += TempA[5] * TempB[6]; - TempC[7] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - TempC[0] += TempA[6] * TempB[0]; - TempC[1] += TempA[6] * TempB[1]; - TempC[2] += TempA[6] * TempB[2]; - TempC[3] += TempA[6] * TempB[3]; - TempC[4] += TempA[6] * TempB[4]; - TempC[5] += TempA[6] * TempB[5]; - TempC[6] += TempA[6] * TempB[6]; - TempC[7] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - TempC[0] += TempA[7] * TempB[0]; - TempC[1] += TempA[7] * TempB[1]; - TempC[2] += TempA[7] * TempB[2]; - TempC[3] += TempA[7] * TempB[3]; - TempC[4] += TempA[7] * TempB[4]; - TempC[5] += TempA[7] * TempB[5]; - TempC[6] += TempA[7] * TempB[6]; - TempC[7] += TempA[7] * TempB[7]; - - C[0+8*n+j*lda] += TempC[0]; - C[1+8*n+j*lda] += TempC[1]; - C[2+8*n+j*lda] += TempC[2]; - C[3+8*n+j*lda] += TempC[3]; - C[4+8*n+j*lda] += TempC[4]; - C[5+8*n+j*lda] += TempC[5]; - C[6+8*n+j*lda] += TempC[6]; - C[7+8*n+j*lda] += TempC[7]; - } - - } - } - } - */ - //-----------------------------------------------------------------version 2.16, optimize v2.15 get rid of tempb. MSI 83K.w/ test one 81K. - - - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - static __thread data_t TempC[8]; - static __thread int j,m,n; - - if(coreid == 1) - { - for ( j = 16; j < 32; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - - - for( n = 0; n < 4; n++) - { - - - - - - TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n]; - TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n]; - TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n]; - TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n]; - TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n]; - TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n]; - TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n]; - TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n]; - - - TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n]; - TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n]; - TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n]; - TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n]; - TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n]; - TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n]; - TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n]; - TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n]; - - - - TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n]; - TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n]; - TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n]; - TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n]; - TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n]; - TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n]; - TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n]; - TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n]; - - - - TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n]; - TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n]; - TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n]; - TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n]; - TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n]; - TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n]; - TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n]; - TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n]; - - TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n]; - TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n]; - TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n]; - TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n]; - TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n]; - TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n]; - TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n]; - TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n]; - - - TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n]; - TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n]; - TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n]; - TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n]; - TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n]; - TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n]; - TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n]; - TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n]; - - - - TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n]; - TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n]; - TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n]; - TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n]; - TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n]; - TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n]; - TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n]; - TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n]; - - - TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n]; - TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n]; - TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n]; - TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n]; - TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n]; - TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n]; - TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n]; - TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n]; - - - - C[0+8*n+j*lda] += TempC[0]; - C[1+8*n+j*lda] += TempC[1]; - C[2+8*n+j*lda] += TempC[2]; - C[3+8*n+j*lda] += TempC[3]; - C[4+8*n+j*lda] += TempC[4]; - C[5+8*n+j*lda] += TempC[5]; - C[6+8*n+j*lda] += TempC[6]; - C[7+8*n+j*lda] += TempC[7]; - } - } - } - } - if(coreid == 0) - { - for ( j = 0; j < 16; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - - - for( n = 0; n < 4; n++) - { - - - - - - TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n]; - TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n]; - TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n]; - TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n]; - TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n]; - TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n]; - TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n]; - TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n]; - - - TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n]; - TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n]; - TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n]; - TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n]; - TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n]; - TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n]; - TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n]; - TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n]; - - - - TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n]; - TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n]; - TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n]; - TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n]; - TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n]; - TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n]; - TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n]; - TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n]; - - - - TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n]; - TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n]; - TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n]; - TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n]; - TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n]; - TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n]; - TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n]; - TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n]; - - TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n]; - TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n]; - TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n]; - TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n]; - TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n]; - TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n]; - TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n]; - TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n]; - - - TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n]; - TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n]; - TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n]; - TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n]; - TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n]; - TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n]; - TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n]; - TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n]; - - - - TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n]; - TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n]; - TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n]; - TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n]; - TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n]; - TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n]; - TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n]; - TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n]; - - - TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n]; - TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n]; - TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n]; - TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n]; - TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n]; - TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n]; - TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n]; - TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n]; - - - - C[0+8*n+j*lda] += TempC[0]; - C[1+8*n+j*lda] += TempC[1]; - C[2+8*n+j*lda] += TempC[2]; - C[3+8*n+j*lda] += TempC[3]; - C[4+8*n+j*lda] += TempC[4]; - C[5+8*n+j*lda] += TempC[5]; - C[6+8*n+j*lda] += TempC[6]; - C[7+8*n+j*lda] += TempC[7]; - } - } - } - } - - //-----------------------------------------------------------------version 2.13 optimize j - /* - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - static __thread data_t TempC[8]; - static __thread int j,m,n,i,k; - - if(coreid == 1) - { - for ( j = 16; j < 32; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[2] * TempB[0]; - C[1+8*n+j*lda] += TempA[2] * TempB[1]; - C[2+8*n+j*lda] += TempA[2] * TempB[2]; - C[3+8*n+j*lda] += TempA[2] * TempB[3]; - C[4+8*n+j*lda] += TempA[2] * TempB[4]; - C[5+8*n+j*lda] += TempA[2] * TempB[5]; - C[6+8*n+j*lda] += TempA[2] * TempB[6]; - C[7+8*n+j*lda] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[3] * TempB[0]; - C[1+8*n+j*lda] += TempA[3] * TempB[1]; - C[2+8*n+j*lda] += TempA[3] * TempB[2]; - C[3+8*n+j*lda] += TempA[3] * TempB[3]; - C[4+8*n+j*lda] += TempA[3] * TempB[4]; - C[5+8*n+j*lda] += TempA[3] * TempB[5]; - C[6+8*n+j*lda] += TempA[3] * TempB[6]; - C[7+8*n+j*lda] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[4] * TempB[0]; - C[1+8*n+j*lda] += TempA[4] * TempB[1]; - C[2+8*n+j*lda] += TempA[4] * TempB[2]; - C[3+8*n+j*lda] += TempA[4] * TempB[3]; - C[4+8*n+j*lda] += TempA[4] * TempB[4]; - C[5+8*n+j*lda] += TempA[4] * TempB[5]; - C[6+8*n+j*lda] += TempA[4] * TempB[6]; - C[7+8*n+j*lda] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[5] * TempB[0]; - C[1+8*n+j*lda] += TempA[5] * TempB[1]; - C[2+8*n+j*lda] += TempA[5] * TempB[2]; - C[3+8*n+j*lda] += TempA[5] * TempB[3]; - C[4+8*n+j*lda] += TempA[5] * TempB[4]; - C[5+8*n+j*lda] += TempA[5] * TempB[5]; - C[6+8*n+j*lda] += TempA[5] * TempB[6]; - C[7+8*n+j*lda] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[6] * TempB[0]; - C[1+8*n+j*lda] += TempA[6] * TempB[1]; - C[2+8*n+j*lda] += TempA[6] * TempB[2]; - C[3+8*n+j*lda] += TempA[6] * TempB[3]; - C[4+8*n+j*lda] += TempA[6] * TempB[4]; - C[5+8*n+j*lda] += TempA[6] * TempB[5]; - C[6+8*n+j*lda] += TempA[6] * TempB[6]; - C[7+8*n+j*lda] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[7] * TempB[0]; - C[1+8*n+j*lda] += TempA[7] * TempB[1]; - C[2+8*n+j*lda] += TempA[7] * TempB[2]; - C[3+8*n+j*lda] += TempA[7] * TempB[3]; - C[4+8*n+j*lda] += TempA[7] * TempB[4]; - C[5+8*n+j*lda] += TempA[7] * TempB[5]; - C[6+8*n+j*lda] += TempA[7] * TempB[6]; - C[7+8*n+j*lda] += TempA[7] * TempB[7]; - } - - } - } - } - if(coreid == 0) - { - for ( j = 0; j < 16; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[2] * TempB[0]; - C[1+8*n+j*lda] += TempA[2] * TempB[1]; - C[2+8*n+j*lda] += TempA[2] * TempB[2]; - C[3+8*n+j*lda] += TempA[2] * TempB[3]; - C[4+8*n+j*lda] += TempA[2] * TempB[4]; - C[5+8*n+j*lda] += TempA[2] * TempB[5]; - C[6+8*n+j*lda] += TempA[2] * TempB[6]; - C[7+8*n+j*lda] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[3] * TempB[0]; - C[1+8*n+j*lda] += TempA[3] * TempB[1]; - C[2+8*n+j*lda] += TempA[3] * TempB[2]; - C[3+8*n+j*lda] += TempA[3] * TempB[3]; - C[4+8*n+j*lda] += TempA[3] * TempB[4]; - C[5+8*n+j*lda] += TempA[3] * TempB[5]; - C[6+8*n+j*lda] += TempA[3] * TempB[6]; - C[7+8*n+j*lda] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[4] * TempB[0]; - C[1+8*n+j*lda] += TempA[4] * TempB[1]; - C[2+8*n+j*lda] += TempA[4] * TempB[2]; - C[3+8*n+j*lda] += TempA[4] * TempB[3]; - C[4+8*n+j*lda] += TempA[4] * TempB[4]; - C[5+8*n+j*lda] += TempA[4] * TempB[5]; - C[6+8*n+j*lda] += TempA[4] * TempB[6]; - C[7+8*n+j*lda] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[5] * TempB[0]; - C[1+8*n+j*lda] += TempA[5] * TempB[1]; - C[2+8*n+j*lda] += TempA[5] * TempB[2]; - C[3+8*n+j*lda] += TempA[5] * TempB[3]; - C[4+8*n+j*lda] += TempA[5] * TempB[4]; - C[5+8*n+j*lda] += TempA[5] * TempB[5]; - C[6+8*n+j*lda] += TempA[5] * TempB[6]; - C[7+8*n+j*lda] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[6] * TempB[0]; - C[1+8*n+j*lda] += TempA[6] * TempB[1]; - C[2+8*n+j*lda] += TempA[6] * TempB[2]; - C[3+8*n+j*lda] += TempA[6] * TempB[3]; - C[4+8*n+j*lda] += TempA[6] * TempB[4]; - C[5+8*n+j*lda] += TempA[6] * TempB[5]; - C[6+8*n+j*lda] += TempA[6] * TempB[6]; - C[7+8*n+j*lda] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[7] * TempB[0]; - C[1+8*n+j*lda] += TempA[7] * TempB[1]; - C[2+8*n+j*lda] += TempA[7] * TempB[2]; - C[3+8*n+j*lda] += TempA[7] * TempB[3]; - C[4+8*n+j*lda] += TempA[7] * TempB[4]; - C[5+8*n+j*lda] += TempA[7] * TempB[5]; - C[6+8*n+j*lda] += TempA[7] * TempB[6]; - C[7+8*n+j*lda] += TempA[7] * TempB[7]; - } - - } - } - } - */ -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/av_matmul/dataset.h b/mt/av_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/av_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/av_matmul/matmul_gendata.pl b/mt/av_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/av_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/av_matmul/matmul_mi.c b/mt/av_matmul/matmul_mi.c deleted file mode 100644 index b0ce08a..0000000 --- a/mt/av_matmul/matmul_mi.c +++ /dev/null @@ -1,2209 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - //-------------------------------------------------------------first working version best 500k - /* - static __thread int i, j, k; - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - } - } - - if(coreid ==1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0;k < lda; k++) - { - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - - } - } - } - } - */ - //-------------------------------------------------------------version1.1, take read out of inner loop,300k - /* - static __thread int i, j, k; - static __thread data_t TempA; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += TempA* B[k*lda + i]; - } - } - } - } - - if(coreid ==1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0;k < lda; k++) - { - TempA = A[j*lda + k]; - for ( i = 0; i < lda; i++) - { - C[i + j*lda] += TempA* B[k*lda + i]; - } - } - } - } - */ - //-------------------------------------------------------------version2.0, read 8 elements in B at one time. 140k mi, MSI117.0k - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k*lda+0+8*n]; - TempB[1] = B[k*lda+1+8*n]; - TempB[2] = B[k*lda+2+8*n]; - TempB[3] = B[k*lda+3+8*n]; - TempB[4] = B[k*lda+4+8*n]; - TempB[5] = B[k*lda+5+8*n]; - TempB[6] = B[k*lda+6+8*n]; - TempB[7] = B[k*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA * TempB[0]; - C[1+8*n+j*lda] += TempA * TempB[1]; - C[2+8*n+j*lda] += TempA * TempB[2]; - C[3+8*n+j*lda] += TempA * TempB[3]; - C[4+8*n+j*lda] += TempA * TempB[4]; - C[5+8*n+j*lda] += TempA * TempB[5]; - C[6+8*n+j*lda] += TempA * TempB[6]; - C[7+8*n+j*lda] += TempA * TempB[7]; - - } - - } - } - } - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k*lda+0+8*n]; - TempB[1] = B[k*lda+1+8*n]; - TempB[2] = B[k*lda+2+8*n]; - TempB[3] = B[k*lda+3+8*n]; - TempB[4] = B[k*lda+4+8*n]; - TempB[5] = B[k*lda+5+8*n]; - TempB[6] = B[k*lda+6+8*n]; - TempB[7] = B[k*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA * TempB[0]; - C[1+8*n+j*lda] += TempA * TempB[1]; - C[2+8*n+j*lda] += TempA * TempB[2]; - C[3+8*n+j*lda] += TempA * TempB[3]; - C[4+8*n+j*lda] += TempA * TempB[4]; - C[5+8*n+j*lda] += TempA * TempB[5]; - C[6+8*n+j*lda] += TempA * TempB[6]; - C[7+8*n+j*lda] += TempA * TempB[7]; - - } - - } - } - } - */ - - //-------------------------------------------------------------version2.1, optimize k. 700k. bad move to v2.2. - //-------------------------------------------------------------version2.9 take off all inner loops for both cores, MSI,109K. MI 182k - //-------------------------------------------------------------version2.10 use i= j*lda inside the n loop increase speed. but not out m and n. tried replace first 3, get 104.9k - /* - static __thread int j, m, i,n; - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - i = j*lda; - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[0] * TempB[0]; - C[1+8*n+i] += TempA[0] * TempB[1]; - C[2+8*n+i] += TempA[0] * TempB[2]; - C[3+8*n+i] += TempA[0] * TempB[3]; - C[4+8*n+i] += TempA[0] * TempB[4]; - C[5+8*n+i] += TempA[0] * TempB[5]; - C[6+8*n+i] += TempA[0] * TempB[6]; - C[7+8*n+i] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[1] * TempB[0]; - C[1+8*n+i] += TempA[1] * TempB[1]; - C[2+8*n+i] += TempA[1] * TempB[2]; - C[3+8*n+i] += TempA[1] * TempB[3]; - C[4+8*n+i] += TempA[1] * TempB[4]; - C[5+8*n+i] += TempA[1] * TempB[5]; - C[6+8*n+i] += TempA[1] * TempB[6]; - C[7+8*n+i] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[2] * TempB[0]; - C[1+8*n+i] += TempA[2] * TempB[1]; - C[2+8*n+i] += TempA[2] * TempB[2]; - C[3+8*n+i] += TempA[2] * TempB[3]; - C[4+8*n+i] += TempA[2] * TempB[4]; - C[5+8*n+i] += TempA[2] * TempB[5]; - C[6+8*n+i] += TempA[2] * TempB[6]; - C[7+8*n+i] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[3] * TempB[0]; - C[1+8*n+i] += TempA[3] * TempB[1]; - C[2+8*n+i] += TempA[3] * TempB[2]; - C[3+8*n+i] += TempA[3] * TempB[3]; - C[4+8*n+i] += TempA[3] * TempB[4]; - C[5+8*n+i] += TempA[3] * TempB[5]; - C[6+8*n+i] += TempA[3] * TempB[6]; - C[7+8*n+i] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[4] * TempB[0]; - C[1+8*n+i] += TempA[4] * TempB[1]; - C[2+8*n+i] += TempA[4] * TempB[2]; - C[3+8*n+i] += TempA[4] * TempB[3]; - C[4+8*n+i] += TempA[4] * TempB[4]; - C[5+8*n+i] += TempA[4] * TempB[5]; - C[6+8*n+i] += TempA[4] * TempB[6]; - C[7+8*n+i] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[5] * TempB[0]; - C[1+8*n+i] += TempA[5] * TempB[1]; - C[2+8*n+i] += TempA[5] * TempB[2]; - C[3+8*n+i] += TempA[5] * TempB[3]; - C[4+8*n+i] += TempA[5] * TempB[4]; - C[5+8*n+i] += TempA[5] * TempB[5]; - C[6+8*n+i] += TempA[5] * TempB[6]; - C[7+8*n+i] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[6] * TempB[0]; - C[1+8*n+i] += TempA[6] * TempB[1]; - C[2+8*n+i] += TempA[6] * TempB[2]; - C[3+8*n+i] += TempA[6] * TempB[3]; - C[4+8*n+i] += TempA[6] * TempB[4]; - C[5+8*n+i] += TempA[6] * TempB[5]; - C[6+8*n+i] += TempA[6] * TempB[6]; - C[7+8*n+i] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[7] * TempB[0]; - C[1+8*n+i] += TempA[7] * TempB[1]; - C[2+8*n+i] += TempA[7] * TempB[2]; - C[3+8*n+i] += TempA[7] * TempB[3]; - C[4+8*n+i] += TempA[7] * TempB[4]; - C[5+8*n+i] += TempA[7] * TempB[5]; - C[6+8*n+i] += TempA[7] * TempB[6]; - C[7+8*n+i] += TempA[7] * TempB[7]; - } - - } - } - } - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - i = j*lda; - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[0] * TempB[0]; - C[1+8*n+i] += TempA[0] * TempB[1]; - C[2+8*n+i] += TempA[0] * TempB[2]; - C[3+8*n+i] += TempA[0] * TempB[3]; - C[4+8*n+i] += TempA[0] * TempB[4]; - C[5+8*n+i] += TempA[0] * TempB[5]; - C[6+8*n+i] += TempA[0] * TempB[6]; - C[7+8*n+i] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[1] * TempB[0]; - C[1+8*n+i] += TempA[1] * TempB[1]; - C[2+8*n+i] += TempA[1] * TempB[2]; - C[3+8*n+i] += TempA[1] * TempB[3]; - C[4+8*n+i] += TempA[1] * TempB[4]; - C[5+8*n+i] += TempA[1] * TempB[5]; - C[6+8*n+i] += TempA[1] * TempB[6]; - C[7+8*n+i] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[2] * TempB[0]; - C[1+8*n+i] += TempA[2] * TempB[1]; - C[2+8*n+i] += TempA[2] * TempB[2]; - C[3+8*n+i] += TempA[2] * TempB[3]; - C[4+8*n+i] += TempA[2] * TempB[4]; - C[5+8*n+i] += TempA[2] * TempB[5]; - C[6+8*n+i] += TempA[2] * TempB[6]; - C[7+8*n+i] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[3] * TempB[0]; - C[1+8*n+i] += TempA[3] * TempB[1]; - C[2+8*n+i] += TempA[3] * TempB[2]; - C[3+8*n+i] += TempA[3] * TempB[3]; - C[4+8*n+i] += TempA[3] * TempB[4]; - C[5+8*n+i] += TempA[3] * TempB[5]; - C[6+8*n+i] += TempA[3] * TempB[6]; - C[7+8*n+i] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[4] * TempB[0]; - C[1+8*n+i] += TempA[4] * TempB[1]; - C[2+8*n+i] += TempA[4] * TempB[2]; - C[3+8*n+i] += TempA[4] * TempB[3]; - C[4+8*n+i] += TempA[4] * TempB[4]; - C[5+8*n+i] += TempA[4] * TempB[5]; - C[6+8*n+i] += TempA[4] * TempB[6]; - C[7+8*n+i] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[5] * TempB[0]; - C[1+8*n+i] += TempA[5] * TempB[1]; - C[2+8*n+i] += TempA[5] * TempB[2]; - C[3+8*n+i] += TempA[5] * TempB[3]; - C[4+8*n+i] += TempA[5] * TempB[4]; - C[5+8*n+i] += TempA[5] * TempB[5]; - C[6+8*n+i] += TempA[5] * TempB[6]; - C[7+8*n+i] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[6] * TempB[0]; - C[1+8*n+i] += TempA[6] * TempB[1]; - C[2+8*n+i] += TempA[6] * TempB[2]; - C[3+8*n+i] += TempA[6] * TempB[3]; - C[4+8*n+i] += TempA[6] * TempB[4]; - C[5+8*n+i] += TempA[6] * TempB[5]; - C[6+8*n+i] += TempA[6] * TempB[6]; - C[7+8*n+i] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[7] * TempB[0]; - C[1+8*n+i] += TempA[7] * TempB[1]; - C[2+8*n+i] += TempA[7] * TempB[2]; - C[3+8*n+i] += TempA[7] * TempB[3]; - C[4+8*n+i] += TempA[7] * TempB[4]; - C[5+8*n+i] += TempA[7] * TempB[5]; - C[6+8*n+i] += TempA[7] * TempB[6]; - C[7+8*n+i] += TempA[7] * TempB[7]; - } - - } - } - } - - */ - //-------------------------------------------------------------version2.2, optimize k. from 4 instead of 8 like v2.1, random failing on MI, unknown reason, MSI,350K, take off each inner loop for core 0 260k, both cores 134k - //-------------------------------------------------------------try false sharing for core 0, 136k. - /* - static __thread int j, m, n; - static __thread data_t TempA[4]; - static __thread data_t TempB[4]; - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( m = 0; m < 8; m++ ) - { - TempA[0] = A[j*lda+0+4*m]; - TempA[1] = A[j*lda+1+4*m]; - TempA[2] = A[j*lda+2+4*m]; - TempA[3] = A[j*lda+3+4*m]; - - for( n = 0; n < 8; n++) - { - - TempB[0] = B[(0+4*m)*lda+0+4*n]; - TempB[1] = B[(0+4*m)*lda+1+4*n]; - TempB[2] = B[(0+4*m)*lda+2+4*n]; - TempB[3] = B[(0+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[0] * TempB[0]; - C[1+4*n+j*lda] += TempA[0] * TempB[1]; - C[2+4*n+j*lda] += TempA[0] * TempB[2]; - C[3+4*n+j*lda] += TempA[0] * TempB[3]; - - - - - - TempB[0] = B[(1+4*m)*lda+0+4*n]; - TempB[1] = B[(1+4*m)*lda+1+4*n]; - TempB[2] = B[(1+4*m)*lda+2+4*n]; - TempB[3] = B[(1+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[1] * TempB[0]; - C[1+4*n+j*lda] += TempA[1] * TempB[1]; - C[2+4*n+j*lda] += TempA[1] * TempB[2]; - C[3+4*n+j*lda] += TempA[1] * TempB[3]; - - - - TempB[0] = B[(2+4*m)*lda+0+4*n]; - TempB[1] = B[(2+4*m)*lda+1+4*n]; - TempB[2] = B[(2+4*m)*lda+2+4*n]; - TempB[3] = B[(2+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[2] * TempB[0]; - C[1+4*n+j*lda] += TempA[2] * TempB[1]; - C[2+4*n+j*lda] += TempA[2] * TempB[2]; - C[3+4*n+j*lda] += TempA[2] * TempB[3]; - - - - - TempB[0] = B[(3+4*m)*lda+0+4*n]; - TempB[1] = B[(3+4*m)*lda+1+4*n]; - TempB[2] = B[(3+4*m)*lda+2+4*n]; - TempB[3] = B[(3+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[3] * TempB[0]; - C[1+4*n+j*lda] += TempA[3] * TempB[1]; - C[2+4*n+j*lda] += TempA[3] * TempB[2]; - C[3+4*n+j*lda] += TempA[3] * TempB[3]; - - - } - } - } - } - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( m = 0; m < 8; m++ ) - { - TempA[0] = A[j*lda+0+4*m]; - TempA[1] = A[j*lda+1+4*m]; - TempA[2] = A[j*lda+2+4*m]; - TempA[3] = A[j*lda+3+4*m]; - - for( n = 0; n < 8; n++) - { - - - - - - - - TempB[0] = B[(1+4*m)*lda+0+4*n]; - TempB[1] = B[(1+4*m)*lda+1+4*n]; - TempB[2] = B[(1+4*m)*lda+2+4*n]; - TempB[3] = B[(1+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[1] * TempB[0]; - C[1+4*n+j*lda] += TempA[1] * TempB[1]; - C[2+4*n+j*lda] += TempA[1] * TempB[2]; - C[3+4*n+j*lda] += TempA[1] * TempB[3]; - - - - TempB[0] = B[(2+4*m)*lda+0+4*n]; - TempB[1] = B[(2+4*m)*lda+1+4*n]; - TempB[2] = B[(2+4*m)*lda+2+4*n]; - TempB[3] = B[(2+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[2] * TempB[0]; - C[1+4*n+j*lda] += TempA[2] * TempB[1]; - C[2+4*n+j*lda] += TempA[2] * TempB[2]; - C[3+4*n+j*lda] += TempA[2] * TempB[3]; - - - - - TempB[0] = B[(3+4*m)*lda+0+4*n]; - TempB[1] = B[(3+4*m)*lda+1+4*n]; - TempB[2] = B[(3+4*m)*lda+2+4*n]; - TempB[3] = B[(3+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[3] * TempB[0]; - C[1+4*n+j*lda] += TempA[3] * TempB[1]; - C[2+4*n+j*lda] += TempA[3] * TempB[2]; - C[3+4*n+j*lda] += TempA[3] * TempB[3]; - - TempB[0] = B[(0+4*m)*lda+0+4*n]; - TempB[1] = B[(0+4*m)*lda+1+4*n]; - TempB[2] = B[(0+4*m)*lda+2+4*n]; - TempB[3] = B[(0+4*m)*lda+3+4*n]; - - - C[0+4*n+j*lda] += TempA[0] * TempB[0]; - C[1+4*n+j*lda] += TempA[0] * TempB[1]; - C[2+4*n+j*lda] += TempA[0] * TempB[2]; - C[3+4*n+j*lda] += TempA[0] * TempB[3]; - - - } - } - } - } - */ - - - - //-------------------------------------------------------------version2.3, read 8 elements in B at one time. make k to 2. 150k mi 128k msi. worse than v2.0 - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA[2]; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( m = 0; m < 16; m++ ) - { - TempA[0] = A[j*lda + 0 + 2*m]; - TempA[1] = A[j*lda + 1 + 2*m]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[2*m*lda+0+8*n]; - TempB[1] = B[2*m*lda+1+8*n]; - TempB[2] = B[2*m*lda+2+8*n]; - TempB[3] = B[2*m*lda+3+8*n]; - TempB[4] = B[2*m*lda+4+8*n]; - TempB[5] = B[2*m*lda+5+8*n]; - TempB[6] = B[2*m*lda+6+8*n]; - TempB[7] = B[2*m*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - TempB[0] = B[(1+2*m)*lda+0+8*n]; - TempB[1] = B[(1+2*m)*lda+1+8*n]; - TempB[2] = B[(1+2*m)*lda+2+8*n]; - TempB[3] = B[(1+2*m)*lda+3+8*n]; - TempB[4] = B[(1+2*m)*lda+4+8*n]; - TempB[5] = B[(1+2*m)*lda+5+8*n]; - TempB[6] = B[(1+2*m)*lda+6+8*n]; - TempB[7] = B[(1+2*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - } - - } - } - } - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( m = 0; m < 16; m++ ) - { - TempA[0] = A[j*lda + 0 + 2*m]; - TempA[1] = A[j*lda + 1 + 2*m]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[2*m*lda+0+8*n]; - TempB[1] = B[2*m*lda+1+8*n]; - TempB[2] = B[2*m*lda+2+8*n]; - TempB[3] = B[2*m*lda+3+8*n]; - TempB[4] = B[2*m*lda+4+8*n]; - TempB[5] = B[2*m*lda+5+8*n]; - TempB[6] = B[2*m*lda+6+8*n]; - TempB[7] = B[2*m*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - TempB[0] = B[(1+2*m)*lda+0+8*n]; - TempB[1] = B[(1+2*m)*lda+1+8*n]; - TempB[2] = B[(1+2*m)*lda+2+8*n]; - TempB[3] = B[(1+2*m)*lda+3+8*n]; - TempB[4] = B[(1+2*m)*lda+4+8*n]; - TempB[5] = B[(1+2*m)*lda+5+8*n]; - TempB[6] = B[(1+2*m)*lda+6+8*n]; - TempB[7] = B[(1+2*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - } - - } - } - } - */ - //-------------------------------------------------------------version2.4, read 4 170k and 16 140k, error because not enough space elements in B at one time. - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[16]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - TempB[8] = B[k*lda+8+16*n]; - TempB[9] = B[k*lda+9+16*n]; - TempB[10] = B[k*lda+10+16*n]; - TempB[11] = B[k*lda+11+16*n]; - TempB[12] = B[k*lda+12+16*n]; - TempB[13] = B[k*lda+13+16*n]; - TempB[14] = B[k*lda+14+16*n]; - TempB[15] = B[k*lda+15+16*n]; - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - C[8+16*n+j*lda] += TempA * TempB[8]; - C[9+16*n+j*lda] += TempA * TempB[9]; - C[10+16*n+j*lda] += TempA * TempB[10]; - C[11+16*n+j*lda] += TempA * TempB[11]; - C[12+16*n+j*lda] += TempA * TempB[12]; - C[13+16*n+j*lda] += TempA * TempB[13]; - C[14+16*n+j*lda] += TempA * TempB[14]; - C[15+16*n+j*lda] += TempA * TempB[15]; - - - - } - - } - } - } - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - TempB[8] = B[k*lda+8+16*n]; - TempB[9] = B[k*lda+9+16*n]; - TempB[10] = B[k*lda+10+16*n]; - TempB[11] = B[k*lda+11+16*n]; - TempB[12] = B[k*lda+12+16*n]; - TempB[13] = B[k*lda+13+16*n]; - TempB[14] = B[k*lda+14+16*n]; - TempB[15] = B[k*lda+15+16*n]; - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - C[8+16*n+j*lda] += TempA * TempB[8]; - C[9+16*n+j*lda] += TempA * TempB[9]; - C[10+16*n+j*lda] += TempA * TempB[10]; - C[11+16*n+j*lda] += TempA * TempB[11]; - C[12+16*n+j*lda] += TempA * TempB[12]; - C[13+16*n+j*lda] += TempA * TempB[13]; - C[14+16*n+j*lda] += TempA * TempB[14]; - C[15+16*n+j*lda] += TempA * TempB[15]; - - - - } - - } - } - } - - */ - //-------------------------------------------------------------version2.5, read 10 elements in B at one time. has corner cases. Turns out it hangs. - /* - static __thread int j, k, n; - static __thread data_t TempA; - static __thread data_t TempB[10]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 3; n++) - { - TempB[0] = B[k*lda+0+10*n]; - TempB[1] = B[k*lda+1+10*n]; - TempB[2] = B[k*lda+2+10*n]; - TempB[3] = B[k*lda+3+10*n]; - TempB[4] = B[k*lda+4+10*n]; - TempB[5] = B[k*lda+5+10*n]; - TempB[6] = B[k*lda+6+10*n]; - TempB[7] = B[k*lda+7+10*n]; - TempB[8] = B[k*lda+8+10*n]; - TempB[9] = B[k*lda+9+10*n]; - - C[0+10*n+j*lda] += TempA * TempB[0]; - C[1+10*n+j*lda] += TempA * TempB[1]; - C[2+10*n+j*lda] += TempA * TempB[2]; - C[3+10*n+j*lda] += TempA * TempB[3]; - C[4+10*n+j*lda] += TempA * TempB[4]; - C[5+10*n+j*lda] += TempA * TempB[5]; - C[6+10*n+j*lda] += TempA * TempB[6]; - C[7+10*n+j*lda] += TempA * TempB[7]; - C[8+10*n+j*lda] += TempA * TempB[8]; - C[9+10*n+j*lda] += TempA * TempB[9]; - } - TempB[0] = B[k*lda+30]; - TempB[1] = B[k*lda+31]; - C[30+j*lda] += TempA * TempB[0]; - C[31+j*lda] += TempA * TempB[1]; - } - } - } - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 3; n++) - { - TempB[0] = B[k*lda+0+10*n]; - TempB[1] = B[k*lda+1+10*n]; - TempB[2] = B[k*lda+2+10*n]; - TempB[3] = B[k*lda+3+10*n]; - TempB[4] = B[k*lda+4+10*n]; - TempB[5] = B[k*lda+5+10*n]; - TempB[6] = B[k*lda+6+10*n]; - TempB[7] = B[k*lda+7+10*n]; - TempB[8] = B[k*lda+8+10*n]; - TempB[9] = B[k*lda+9+10*n]; - - C[0+10*n+j*lda] += TempA * TempB[0]; - C[1+10*n+j*lda] += TempA * TempB[1]; - C[2+10*n+j*lda] += TempA * TempB[2]; - C[3+10*n+j*lda] += TempA * TempB[3]; - C[4+10*n+j*lda] += TempA * TempB[4]; - C[5+10*n+j*lda] += TempA * TempB[5]; - C[6+10*n+j*lda] += TempA * TempB[6]; - C[7+10*n+j*lda] += TempA * TempB[7]; - C[8+10*n+j*lda] += TempA * TempB[8]; - C[9+10*n+j*lda] += TempA * TempB[9]; - } - TempB[0] = B[k*lda+30]; - TempB[1] = B[k*lda+31]; - C[30+j*lda] += TempA * TempB[0]; - C[31+j*lda] += TempA * TempB[1]; - } - } - } - - */ - - //-------------------------------------------------------------version2.6, optimize 2.0. take off n loop and tried different order of reading B - /* - static __thread int j, k, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - - TempB[0] = B[k*lda+0]; - TempB[1] = B[k*lda+1]; - TempB[2] = B[k*lda+2]; - TempB[3] = B[k*lda+3]; - TempB[4] = B[k*lda+4]; - TempB[5] = B[k*lda+5]; - TempB[6] = B[k*lda+6]; - TempB[7] = B[k*lda+7]; - - C[0+j*lda] += TempA * TempB[0]; - C[1+j*lda] += TempA * TempB[1]; - C[2+j*lda] += TempA * TempB[2]; - C[3+j*lda] += TempA * TempB[3]; - C[4+j*lda] += TempA * TempB[4]; - C[5+j*lda] += TempA * TempB[5]; - C[6+j*lda] += TempA * TempB[6]; - C[7+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+8]; - TempB[1] = B[k*lda+9]; - TempB[2] = B[k*lda+10]; - TempB[3] = B[k*lda+11]; - TempB[4] = B[k*lda+12]; - TempB[5] = B[k*lda+13]; - TempB[6] = B[k*lda+14]; - TempB[7] = B[k*lda+15]; - - C[8+j*lda] += TempA * TempB[0]; - C[9+j*lda] += TempA * TempB[1]; - C[10+j*lda] += TempA * TempB[2]; - C[11+j*lda] += TempA * TempB[3]; - C[12+j*lda] += TempA * TempB[4]; - C[13+j*lda] += TempA * TempB[5]; - C[14+j*lda] += TempA * TempB[6]; - C[15+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+16]; - TempB[1] = B[k*lda+17]; - TempB[2] = B[k*lda+18]; - TempB[3] = B[k*lda+19]; - TempB[4] = B[k*lda+20]; - TempB[5] = B[k*lda+21]; - TempB[6] = B[k*lda+22]; - TempB[7] = B[k*lda+23]; - - C[16+j*lda] += TempA * TempB[0]; - C[17+j*lda] += TempA * TempB[1]; - C[18+j*lda] += TempA * TempB[2]; - C[19+j*lda] += TempA * TempB[3]; - C[20+j*lda] += TempA * TempB[4]; - C[21+j*lda] += TempA * TempB[5]; - C[22+j*lda] += TempA * TempB[6]; - C[23+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+24]; - TempB[1] = B[k*lda+25]; - TempB[2] = B[k*lda+26]; - TempB[3] = B[k*lda+27]; - TempB[4] = B[k*lda+28]; - TempB[5] = B[k*lda+29]; - TempB[6] = B[k*lda+30]; - TempB[7] = B[k*lda+31]; - - C[24+j*lda] += TempA * TempB[0]; - C[25+j*lda] += TempA * TempB[1]; - C[26+j*lda] += TempA * TempB[2]; - C[27+j*lda] += TempA * TempB[3]; - C[28+j*lda] += TempA * TempB[4]; - C[29+j*lda] += TempA * TempB[5]; - C[30+j*lda] += TempA * TempB[6]; - C[31+j*lda] += TempA * TempB[7]; - - - - } - } - } - - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - - - TempB[0] = B[k*lda+24]; - TempB[1] = B[k*lda+25]; - TempB[2] = B[k*lda+26]; - TempB[3] = B[k*lda+27]; - TempB[4] = B[k*lda+28]; - TempB[5] = B[k*lda+29]; - TempB[6] = B[k*lda+30]; - TempB[7] = B[k*lda+31]; - - C[24+j*lda] += TempA * TempB[0]; - C[25+j*lda] += TempA * TempB[1]; - C[26+j*lda] += TempA * TempB[2]; - C[27+j*lda] += TempA * TempB[3]; - C[28+j*lda] += TempA * TempB[4]; - C[29+j*lda] += TempA * TempB[5]; - C[30+j*lda] += TempA * TempB[6]; - C[31+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+0]; - TempB[1] = B[k*lda+1]; - TempB[2] = B[k*lda+2]; - TempB[3] = B[k*lda+3]; - TempB[4] = B[k*lda+4]; - TempB[5] = B[k*lda+5]; - TempB[6] = B[k*lda+6]; - TempB[7] = B[k*lda+7]; - - C[0+j*lda] += TempA * TempB[0]; - C[1+j*lda] += TempA * TempB[1]; - C[2+j*lda] += TempA * TempB[2]; - C[3+j*lda] += TempA * TempB[3]; - C[4+j*lda] += TempA * TempB[4]; - C[5+j*lda] += TempA * TempB[5]; - C[6+j*lda] += TempA * TempB[6]; - C[7+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+8]; - TempB[1] = B[k*lda+9]; - TempB[2] = B[k*lda+10]; - TempB[3] = B[k*lda+11]; - TempB[4] = B[k*lda+12]; - TempB[5] = B[k*lda+13]; - TempB[6] = B[k*lda+14]; - TempB[7] = B[k*lda+15]; - - C[8+j*lda] += TempA * TempB[0]; - C[9+j*lda] += TempA * TempB[1]; - C[10+j*lda] += TempA * TempB[2]; - C[11+j*lda] += TempA * TempB[3]; - C[12+j*lda] += TempA * TempB[4]; - C[13+j*lda] += TempA * TempB[5]; - C[14+j*lda] += TempA * TempB[6]; - C[15+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+16]; - TempB[1] = B[k*lda+17]; - TempB[2] = B[k*lda+18]; - TempB[3] = B[k*lda+19]; - TempB[4] = B[k*lda+20]; - TempB[5] = B[k*lda+21]; - TempB[6] = B[k*lda+22]; - TempB[7] = B[k*lda+23]; - - C[16+j*lda] += TempA * TempB[0]; - C[17+j*lda] += TempA * TempB[1]; - C[18+j*lda] += TempA * TempB[2]; - C[19+j*lda] += TempA * TempB[3]; - C[20+j*lda] += TempA * TempB[4]; - C[21+j*lda] += TempA * TempB[5]; - C[22+j*lda] += TempA * TempB[6]; - C[23+j*lda] += TempA * TempB[7]; - - - - - - - } - } - } - */ - //-------------------------------------------------------------version2.7, use m=l*da, i=k*lda,out of stack, only i, MI 150k, only m, MSI 117.9k slower than v2.0 - /* - static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - m = j * lda; - for ( k = 0; k < lda; k++ ) - { - TempA = A[m+ k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k *lda+0+8*n]; - TempB[1] = B[k *lda+1+8*n]; - TempB[2] = B[k *lda+2+8*n]; - TempB[3] = B[k *lda+3+8*n]; - TempB[4] = B[k *lda+4+8*n]; - TempB[5] = B[k *lda+5+8*n]; - TempB[6] = B[k *lda+6+8*n]; - TempB[7] = B[k *lda+7+8*n]; - - C[0+8*n+m] += TempA * TempB[0]; - C[1+8*n+m] += TempA * TempB[1]; - C[2+8*n+m] += TempA * TempB[2]; - C[3+8*n+m] += TempA * TempB[3]; - C[4+8*n+m] += TempA * TempB[4]; - C[5+8*n+m] += TempA * TempB[5]; - C[6+8*n+m] += TempA * TempB[6]; - C[7+8*n+m] += TempA * TempB[7]; - - } - - } - } - } -if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - m = j * lda; - for ( k = 0; k < lda; k++ ) - { - TempA = A[m+ k]; - for( n = 0; n < 4; n++) - { - - TempB[0] = B[k *lda+0+8*n]; - TempB[1] = B[k *lda+1+8*n]; - TempB[2] = B[k *lda+2+8*n]; - TempB[3] = B[k *lda+3+8*n]; - TempB[4] = B[k *lda+4+8*n]; - TempB[5] = B[k *lda+5+8*n]; - TempB[6] = B[k *lda+6+8*n]; - TempB[7] = B[k *lda+7+8*n]; - - C[0+8*n+m] += TempA * TempB[0]; - C[1+8*n+m] += TempA * TempB[1]; - C[2+8*n+m] += TempA * TempB[2]; - C[3+8*n+m] += TempA * TempB[3]; - C[4+8*n+m] += TempA * TempB[4]; - C[5+8*n+m] += TempA * TempB[5]; - C[6+8*n+m] += TempA * TempB[6]; - C[7+8*n+m] += TempA * TempB[7]; - - } - - } - } - } - */ -//-------------------------------------------------------------version2.8 deal with false sharing, MSI,118K vs v2.0 117.0K. MI 147.629K. -/* -static __thread int i, j, k, m, n; - static __thread data_t TempA; - static __thread data_t TempB[8]; - - if(coreid == 0) - { - for ( j = 0; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+8+16*n]; - TempB[1] = B[k*lda+9+16*n]; - TempB[2] = B[k*lda+10+16*n]; - TempB[3] = B[k*lda+11+16*n]; - TempB[4] = B[k*lda+12+16*n]; - TempB[5] = B[k*lda+13+16*n]; - TempB[6] = B[k*lda+14+16*n]; - TempB[7] = B[k*lda+15+16*n]; - - C[8+16*n+j*lda] += TempA * TempB[0]; - C[9+16*n+j*lda] += TempA * TempB[1]; - C[10+16*n+j*lda] += TempA * TempB[2]; - C[11+16*n+j*lda] += TempA * TempB[3]; - C[12+16*n+j*lda] += TempA * TempB[4]; - C[13+16*n+j*lda] += TempA * TempB[5]; - C[14+16*n+j*lda] += TempA * TempB[6]; - C[15+16*n+j*lda] += TempA * TempB[7]; - - - - } - - } - } - } - if(coreid == 1) - { - for ( j = 1; j < lda; j+=2 ) - { - for ( k = 0; k < lda; k++ ) - { - TempA = A[j*lda + k]; - for( n = 0; n < 2; n++) - { - - - - TempB[0] = B[k*lda+8+16*n]; - TempB[1] = B[k*lda+9+16*n]; - TempB[2] = B[k*lda+10+16*n]; - TempB[3] = B[k*lda+11+16*n]; - TempB[4] = B[k*lda+12+16*n]; - TempB[5] = B[k*lda+13+16*n]; - TempB[6] = B[k*lda+14+16*n]; - TempB[7] = B[k*lda+15+16*n]; - - C[8+16*n+j*lda] += TempA * TempB[0]; - C[9+16*n+j*lda] += TempA * TempB[1]; - C[10+16*n+j*lda] += TempA * TempB[2]; - C[11+16*n+j*lda] += TempA * TempB[3]; - C[12+16*n+j*lda] += TempA * TempB[4]; - C[13+16*n+j*lda] += TempA * TempB[5]; - C[14+16*n+j*lda] += TempA * TempB[6]; - C[15+16*n+j*lda] += TempA * TempB[7]; - - TempB[0] = B[k*lda+0+16*n]; - TempB[1] = B[k*lda+1+16*n]; - TempB[2] = B[k*lda+2+16*n]; - TempB[3] = B[k*lda+3+16*n]; - TempB[4] = B[k*lda+4+16*n]; - TempB[5] = B[k*lda+5+16*n]; - TempB[6] = B[k*lda+6+16*n]; - TempB[7] = B[k*lda+7+16*n]; - - - - C[0+16*n+j*lda] += TempA * TempB[0]; - C[1+16*n+j*lda] += TempA * TempB[1]; - C[2+16*n+j*lda] += TempA * TempB[2]; - C[3+16*n+j*lda] += TempA * TempB[3]; - C[4+16*n+j*lda] += TempA * TempB[4]; - C[5+16*n+j*lda] += TempA * TempB[5]; - C[6+16*n+j*lda] += TempA * TempB[6]; - C[7+16*n+j*lda] += TempA * TempB[7]; - - - } - - } - } - } - */ - - //----------------------------------------------------------------version 2.11 optmize j,use core 1 j from 0 to 15 MSI 98k i = j*lda - //----------------------------------------------------------------version 2.12 not use i = j *lda MSI 95k - static __thread data_t TempA[8]; - static __thread data_t TempB[8]; - static __thread int j,m,n,i,k; - - if(coreid == 1) - { - for ( j = 16; j < 32; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - /* - i = j*lda; - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[0] * TempB[0]; - C[1+8*n+i] += TempA[0] * TempB[1]; - C[2+8*n+i] += TempA[0] * TempB[2]; - C[3+8*n+i] += TempA[0] * TempB[3]; - C[4+8*n+i] += TempA[0] * TempB[4]; - C[5+8*n+i] += TempA[0] * TempB[5]; - C[6+8*n+i] += TempA[0] * TempB[6]; - C[7+8*n+i] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[1] * TempB[0]; - C[1+8*n+i] += TempA[1] * TempB[1]; - C[2+8*n+i] += TempA[1] * TempB[2]; - C[3+8*n+i] += TempA[1] * TempB[3]; - C[4+8*n+i] += TempA[1] * TempB[4]; - C[5+8*n+i] += TempA[1] * TempB[5]; - C[6+8*n+i] += TempA[1] * TempB[6]; - C[7+8*n+i] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[2] * TempB[0]; - C[1+8*n+i] += TempA[2] * TempB[1]; - C[2+8*n+i] += TempA[2] * TempB[2]; - C[3+8*n+i] += TempA[2] * TempB[3]; - C[4+8*n+i] += TempA[2] * TempB[4]; - C[5+8*n+i] += TempA[2] * TempB[5]; - C[6+8*n+i] += TempA[2] * TempB[6]; - C[7+8*n+i] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[3] * TempB[0]; - C[1+8*n+i] += TempA[3] * TempB[1]; - C[2+8*n+i] += TempA[3] * TempB[2]; - C[3+8*n+i] += TempA[3] * TempB[3]; - C[4+8*n+i] += TempA[3] * TempB[4]; - C[5+8*n+i] += TempA[3] * TempB[5]; - C[6+8*n+i] += TempA[3] * TempB[6]; - C[7+8*n+i] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[4] * TempB[0]; - C[1+8*n+i] += TempA[4] * TempB[1]; - C[2+8*n+i] += TempA[4] * TempB[2]; - C[3+8*n+i] += TempA[4] * TempB[3]; - C[4+8*n+i] += TempA[4] * TempB[4]; - C[5+8*n+i] += TempA[4] * TempB[5]; - C[6+8*n+i] += TempA[4] * TempB[6]; - C[7+8*n+i] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[5] * TempB[0]; - C[1+8*n+i] += TempA[5] * TempB[1]; - C[2+8*n+i] += TempA[5] * TempB[2]; - C[3+8*n+i] += TempA[5] * TempB[3]; - C[4+8*n+i] += TempA[5] * TempB[4]; - C[5+8*n+i] += TempA[5] * TempB[5]; - C[6+8*n+i] += TempA[5] * TempB[6]; - C[7+8*n+i] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[6] * TempB[0]; - C[1+8*n+i] += TempA[6] * TempB[1]; - C[2+8*n+i] += TempA[6] * TempB[2]; - C[3+8*n+i] += TempA[6] * TempB[3]; - C[4+8*n+i] += TempA[6] * TempB[4]; - C[5+8*n+i] += TempA[6] * TempB[5]; - C[6+8*n+i] += TempA[6] * TempB[6]; - C[7+8*n+i] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[7] * TempB[0]; - C[1+8*n+i] += TempA[7] * TempB[1]; - C[2+8*n+i] += TempA[7] * TempB[2]; - C[3+8*n+i] += TempA[7] * TempB[3]; - C[4+8*n+i] += TempA[7] * TempB[4]; - C[5+8*n+i] += TempA[7] * TempB[5]; - C[6+8*n+i] += TempA[7] * TempB[6]; - C[7+8*n+i] += TempA[7] * TempB[7]; - - */ - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[2] * TempB[0]; - C[1+8*n+j*lda] += TempA[2] * TempB[1]; - C[2+8*n+j*lda] += TempA[2] * TempB[2]; - C[3+8*n+j*lda] += TempA[2] * TempB[3]; - C[4+8*n+j*lda] += TempA[2] * TempB[4]; - C[5+8*n+j*lda] += TempA[2] * TempB[5]; - C[6+8*n+j*lda] += TempA[2] * TempB[6]; - C[7+8*n+j*lda] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[3] * TempB[0]; - C[1+8*n+j*lda] += TempA[3] * TempB[1]; - C[2+8*n+j*lda] += TempA[3] * TempB[2]; - C[3+8*n+j*lda] += TempA[3] * TempB[3]; - C[4+8*n+j*lda] += TempA[3] * TempB[4]; - C[5+8*n+j*lda] += TempA[3] * TempB[5]; - C[6+8*n+j*lda] += TempA[3] * TempB[6]; - C[7+8*n+j*lda] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[4] * TempB[0]; - C[1+8*n+j*lda] += TempA[4] * TempB[1]; - C[2+8*n+j*lda] += TempA[4] * TempB[2]; - C[3+8*n+j*lda] += TempA[4] * TempB[3]; - C[4+8*n+j*lda] += TempA[4] * TempB[4]; - C[5+8*n+j*lda] += TempA[4] * TempB[5]; - C[6+8*n+j*lda] += TempA[4] * TempB[6]; - C[7+8*n+j*lda] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[5] * TempB[0]; - C[1+8*n+j*lda] += TempA[5] * TempB[1]; - C[2+8*n+j*lda] += TempA[5] * TempB[2]; - C[3+8*n+j*lda] += TempA[5] * TempB[3]; - C[4+8*n+j*lda] += TempA[5] * TempB[4]; - C[5+8*n+j*lda] += TempA[5] * TempB[5]; - C[6+8*n+j*lda] += TempA[5] * TempB[6]; - C[7+8*n+j*lda] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[6] * TempB[0]; - C[1+8*n+j*lda] += TempA[6] * TempB[1]; - C[2+8*n+j*lda] += TempA[6] * TempB[2]; - C[3+8*n+j*lda] += TempA[6] * TempB[3]; - C[4+8*n+j*lda] += TempA[6] * TempB[4]; - C[5+8*n+j*lda] += TempA[6] * TempB[5]; - C[6+8*n+j*lda] += TempA[6] * TempB[6]; - C[7+8*n+j*lda] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[7] * TempB[0]; - C[1+8*n+j*lda] += TempA[7] * TempB[1]; - C[2+8*n+j*lda] += TempA[7] * TempB[2]; - C[3+8*n+j*lda] += TempA[7] * TempB[3]; - C[4+8*n+j*lda] += TempA[7] * TempB[4]; - C[5+8*n+j*lda] += TempA[7] * TempB[5]; - C[6+8*n+j*lda] += TempA[7] * TempB[6]; - C[7+8*n+j*lda] += TempA[7] * TempB[7]; - } - - } - } - } - if(coreid ==0) - { - for ( j = 0; j < 16; j++ ) - { - - for ( m = 0; m < 4; m++ ) - { - - TempA[0] = A[j*lda+0+8*m]; - TempA[1] = A[j*lda+1+8*m]; - TempA[2] = A[j*lda+2+8*m]; - TempA[3] = A[j*lda+3+8*m]; - TempA[4] = A[j*lda+4+8*m]; - TempA[5] = A[j*lda+5+8*m]; - TempA[6] = A[j*lda+6+8*m]; - TempA[7] = A[j*lda+7+8*m]; - - for( n = 0; n < 4; n++) - { - /* - i = j*lda; - - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[0] * TempB[0]; - C[1+8*n+i] += TempA[0] * TempB[1]; - C[2+8*n+i] += TempA[0] * TempB[2]; - C[3+8*n+i] += TempA[0] * TempB[3]; - C[4+8*n+i] += TempA[0] * TempB[4]; - C[5+8*n+i] += TempA[0] * TempB[5]; - C[6+8*n+i] += TempA[0] * TempB[6]; - C[7+8*n+i] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[1] * TempB[0]; - C[1+8*n+i] += TempA[1] * TempB[1]; - C[2+8*n+i] += TempA[1] * TempB[2]; - C[3+8*n+i] += TempA[1] * TempB[3]; - C[4+8*n+i] += TempA[1] * TempB[4]; - C[5+8*n+i] += TempA[1] * TempB[5]; - C[6+8*n+i] += TempA[1] * TempB[6]; - C[7+8*n+i] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[2] * TempB[0]; - C[1+8*n+i] += TempA[2] * TempB[1]; - C[2+8*n+i] += TempA[2] * TempB[2]; - C[3+8*n+i] += TempA[2] * TempB[3]; - C[4+8*n+i] += TempA[2] * TempB[4]; - C[5+8*n+i] += TempA[2] * TempB[5]; - C[6+8*n+i] += TempA[2] * TempB[6]; - C[7+8*n+i] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[3] * TempB[0]; - C[1+8*n+i] += TempA[3] * TempB[1]; - C[2+8*n+i] += TempA[3] * TempB[2]; - C[3+8*n+i] += TempA[3] * TempB[3]; - C[4+8*n+i] += TempA[3] * TempB[4]; - C[5+8*n+i] += TempA[3] * TempB[5]; - C[6+8*n+i] += TempA[3] * TempB[6]; - C[7+8*n+i] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[4] * TempB[0]; - C[1+8*n+i] += TempA[4] * TempB[1]; - C[2+8*n+i] += TempA[4] * TempB[2]; - C[3+8*n+i] += TempA[4] * TempB[3]; - C[4+8*n+i] += TempA[4] * TempB[4]; - C[5+8*n+i] += TempA[4] * TempB[5]; - C[6+8*n+i] += TempA[4] * TempB[6]; - C[7+8*n+i] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[5] * TempB[0]; - C[1+8*n+i] += TempA[5] * TempB[1]; - C[2+8*n+i] += TempA[5] * TempB[2]; - C[3+8*n+i] += TempA[5] * TempB[3]; - C[4+8*n+i] += TempA[5] * TempB[4]; - C[5+8*n+i] += TempA[5] * TempB[5]; - C[6+8*n+i] += TempA[5] * TempB[6]; - C[7+8*n+i] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[6] * TempB[0]; - C[1+8*n+i] += TempA[6] * TempB[1]; - C[2+8*n+i] += TempA[6] * TempB[2]; - C[3+8*n+i] += TempA[6] * TempB[3]; - C[4+8*n+i] += TempA[6] * TempB[4]; - C[5+8*n+i] += TempA[6] * TempB[5]; - C[6+8*n+i] += TempA[6] * TempB[6]; - C[7+8*n+i] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+i] += TempA[7] * TempB[0]; - C[1+8*n+i] += TempA[7] * TempB[1]; - C[2+8*n+i] += TempA[7] * TempB[2]; - C[3+8*n+i] += TempA[7] * TempB[3]; - C[4+8*n+i] += TempA[7] * TempB[4]; - C[5+8*n+i] += TempA[7] * TempB[5]; - C[6+8*n+i] += TempA[7] * TempB[6]; - C[7+8*n+i] += TempA[7] * TempB[7]; - - */ - TempB[0] = B[(0+8*m)*lda+0+8*n]; - TempB[1] = B[(0+8*m)*lda+1+8*n]; - TempB[2] = B[(0+8*m)*lda+2+8*n]; - TempB[3] = B[(0+8*m)*lda+3+8*n]; - TempB[4] = B[(0+8*m)*lda+4+8*n]; - TempB[5] = B[(0+8*m)*lda+5+8*n]; - TempB[6] = B[(0+8*m)*lda+6+8*n]; - TempB[7] = B[(0+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[0] * TempB[0]; - C[1+8*n+j*lda] += TempA[0] * TempB[1]; - C[2+8*n+j*lda] += TempA[0] * TempB[2]; - C[3+8*n+j*lda] += TempA[0] * TempB[3]; - C[4+8*n+j*lda] += TempA[0] * TempB[4]; - C[5+8*n+j*lda] += TempA[0] * TempB[5]; - C[6+8*n+j*lda] += TempA[0] * TempB[6]; - C[7+8*n+j*lda] += TempA[0] * TempB[7]; - - - - TempB[0] = B[(1+8*m)*lda+0+8*n]; - TempB[1] = B[(1+8*m)*lda+1+8*n]; - TempB[2] = B[(1+8*m)*lda+2+8*n]; - TempB[3] = B[(1+8*m)*lda+3+8*n]; - TempB[4] = B[(1+8*m)*lda+4+8*n]; - TempB[5] = B[(1+8*m)*lda+5+8*n]; - TempB[6] = B[(1+8*m)*lda+6+8*n]; - TempB[7] = B[(1+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[1] * TempB[0]; - C[1+8*n+j*lda] += TempA[1] * TempB[1]; - C[2+8*n+j*lda] += TempA[1] * TempB[2]; - C[3+8*n+j*lda] += TempA[1] * TempB[3]; - C[4+8*n+j*lda] += TempA[1] * TempB[4]; - C[5+8*n+j*lda] += TempA[1] * TempB[5]; - C[6+8*n+j*lda] += TempA[1] * TempB[6]; - C[7+8*n+j*lda] += TempA[1] * TempB[7]; - - - - TempB[0] = B[(2+8*m)*lda+0+8*n]; - TempB[1] = B[(2+8*m)*lda+1+8*n]; - TempB[2] = B[(2+8*m)*lda+2+8*n]; - TempB[3] = B[(2+8*m)*lda+3+8*n]; - TempB[4] = B[(2+8*m)*lda+4+8*n]; - TempB[5] = B[(2+8*m)*lda+5+8*n]; - TempB[6] = B[(2+8*m)*lda+6+8*n]; - TempB[7] = B[(2+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[2] * TempB[0]; - C[1+8*n+j*lda] += TempA[2] * TempB[1]; - C[2+8*n+j*lda] += TempA[2] * TempB[2]; - C[3+8*n+j*lda] += TempA[2] * TempB[3]; - C[4+8*n+j*lda] += TempA[2] * TempB[4]; - C[5+8*n+j*lda] += TempA[2] * TempB[5]; - C[6+8*n+j*lda] += TempA[2] * TempB[6]; - C[7+8*n+j*lda] += TempA[2] * TempB[7]; - - - - TempB[0] = B[(3+8*m)*lda+0+8*n]; - TempB[1] = B[(3+8*m)*lda+1+8*n]; - TempB[2] = B[(3+8*m)*lda+2+8*n]; - TempB[3] = B[(3+8*m)*lda+3+8*n]; - TempB[4] = B[(3+8*m)*lda+4+8*n]; - TempB[5] = B[(3+8*m)*lda+5+8*n]; - TempB[6] = B[(3+8*m)*lda+6+8*n]; - TempB[7] = B[(3+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[3] * TempB[0]; - C[1+8*n+j*lda] += TempA[3] * TempB[1]; - C[2+8*n+j*lda] += TempA[3] * TempB[2]; - C[3+8*n+j*lda] += TempA[3] * TempB[3]; - C[4+8*n+j*lda] += TempA[3] * TempB[4]; - C[5+8*n+j*lda] += TempA[3] * TempB[5]; - C[6+8*n+j*lda] += TempA[3] * TempB[6]; - C[7+8*n+j*lda] += TempA[3] * TempB[7]; - - - TempB[0] = B[(4+8*m)*lda+0+8*n]; - TempB[1] = B[(4+8*m)*lda+1+8*n]; - TempB[2] = B[(4+8*m)*lda+2+8*n]; - TempB[3] = B[(4+8*m)*lda+3+8*n]; - TempB[4] = B[(4+8*m)*lda+4+8*n]; - TempB[5] = B[(4+8*m)*lda+5+8*n]; - TempB[6] = B[(4+8*m)*lda+6+8*n]; - TempB[7] = B[(4+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[4] * TempB[0]; - C[1+8*n+j*lda] += TempA[4] * TempB[1]; - C[2+8*n+j*lda] += TempA[4] * TempB[2]; - C[3+8*n+j*lda] += TempA[4] * TempB[3]; - C[4+8*n+j*lda] += TempA[4] * TempB[4]; - C[5+8*n+j*lda] += TempA[4] * TempB[5]; - C[6+8*n+j*lda] += TempA[4] * TempB[6]; - C[7+8*n+j*lda] += TempA[4] * TempB[7]; - - - - TempB[0] = B[(5+8*m)*lda+0+8*n]; - TempB[1] = B[(5+8*m)*lda+1+8*n]; - TempB[2] = B[(5+8*m)*lda+2+8*n]; - TempB[3] = B[(5+8*m)*lda+3+8*n]; - TempB[4] = B[(5+8*m)*lda+4+8*n]; - TempB[5] = B[(5+8*m)*lda+5+8*n]; - TempB[6] = B[(5+8*m)*lda+6+8*n]; - TempB[7] = B[(5+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[5] * TempB[0]; - C[1+8*n+j*lda] += TempA[5] * TempB[1]; - C[2+8*n+j*lda] += TempA[5] * TempB[2]; - C[3+8*n+j*lda] += TempA[5] * TempB[3]; - C[4+8*n+j*lda] += TempA[5] * TempB[4]; - C[5+8*n+j*lda] += TempA[5] * TempB[5]; - C[6+8*n+j*lda] += TempA[5] * TempB[6]; - C[7+8*n+j*lda] += TempA[5] * TempB[7]; - - - - TempB[0] = B[(6+8*m)*lda+0+8*n]; - TempB[1] = B[(6+8*m)*lda+1+8*n]; - TempB[2] = B[(6+8*m)*lda+2+8*n]; - TempB[3] = B[(6+8*m)*lda+3+8*n]; - TempB[4] = B[(6+8*m)*lda+4+8*n]; - TempB[5] = B[(6+8*m)*lda+5+8*n]; - TempB[6] = B[(6+8*m)*lda+6+8*n]; - TempB[7] = B[(6+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[6] * TempB[0]; - C[1+8*n+j*lda] += TempA[6] * TempB[1]; - C[2+8*n+j*lda] += TempA[6] * TempB[2]; - C[3+8*n+j*lda] += TempA[6] * TempB[3]; - C[4+8*n+j*lda] += TempA[6] * TempB[4]; - C[5+8*n+j*lda] += TempA[6] * TempB[5]; - C[6+8*n+j*lda] += TempA[6] * TempB[6]; - C[7+8*n+j*lda] += TempA[6] * TempB[7]; - - - TempB[0] = B[(7+8*m)*lda+0+8*n]; - TempB[1] = B[(7+8*m)*lda+1+8*n]; - TempB[2] = B[(7+8*m)*lda+2+8*n]; - TempB[3] = B[(7+8*m)*lda+3+8*n]; - TempB[4] = B[(7+8*m)*lda+4+8*n]; - TempB[5] = B[(7+8*m)*lda+5+8*n]; - TempB[6] = B[(7+8*m)*lda+6+8*n]; - TempB[7] = B[(7+8*m)*lda+7+8*n]; - - C[0+8*n+j*lda] += TempA[7] * TempB[0]; - C[1+8*n+j*lda] += TempA[7] * TempB[1]; - C[2+8*n+j*lda] += TempA[7] * TempB[2]; - C[3+8*n+j*lda] += TempA[7] * TempB[3]; - C[4+8*n+j*lda] += TempA[7] * TempB[4]; - C[5+8*n+j*lda] += TempA[7] * TempB[5]; - C[6+8*n+j*lda] += TempA[7] * TempB[6]; - C[7+8*n+j*lda] += TempA[7] * TempB[7]; - } - - } - } - } - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - /* - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - */ - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/av_vvadd/av_vvadd.c b/mt/av_vvadd/av_vvadd.c deleted file mode 100644 index 11202c7..0000000 --- a/mt/av_vvadd/av_vvadd.c +++ /dev/null @@ -1,196 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - if(coreid ==0) - { - for (i = coreid; i < n-3; i+=4) - { - x[i] = x[i] + y[i]; - x[i+1] = x[i+1] + y[i+1]; - } - i = i + 4; - - - for (i; i < (n+1); i+=1) - { - x[i] = x[i] + y[i]; - } - - - } - if(coreid ==1) - { - for (i = 2; i < n; i+=4) - { - x[i] = x[i] + y[i]; - x[i+1] = x[i+1] + y[i+1]; - - } - - - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/av_vvadd/dataset.h b/mt/av_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/av_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/av_vvadd/vvadd_gendata.pl b/mt/av_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/av_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ay_matmul.c b/mt/ay_matmul.c new file mode 100644 index 0000000..45fb194 --- /dev/null +++ b/mt/ay_matmul.c @@ -0,0 +1,60 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + if(coreid > 1) return; + static __thread int i, j, k; + static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; + static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; + + static __thread int start, end, jStride, jToRow, jToCol; + + start = coreid << 9; + end = ((ncores == 1) ? 2 : (coreid+1) ) << 9; + jStride = 8; + + for (j=start; j < end; j+=jStride) { + jToRow = (j>>5)<<5; + jToCol = j%32; + tempC0 = 0; + tempC1 = 0; + tempC2 = 0; + tempC3 = 0; + tempC4 = 0; + tempC5 = 0; + tempC6 = 0; + tempC7 = 0; + for ( i=0; i < lda; i+=2 ) { + tempA0 = A[i + jToRow]; + tempA1 = A[i+1 + jToRow]; + tempC0 += tempA0 * B[(jToCol ) + (i<<5)]; + tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)]; + tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)]; + tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)]; + tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)]; + tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)]; + tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)]; + tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)]; + tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)]; + tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)]; + tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)]; + tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)]; + tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)]; + tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)]; + tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)]; + tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)]; + } + C[j] =tempC0; + C[j + 1 ]=tempC1; + C[j + 2 ]=tempC2; + C[j + 3 ]=tempC3; + C[j + 4 ]=tempC4; + C[j + 5 ]=tempC5; + C[j + 6 ]=tempC6; + C[j + 7 ]=tempC7; + } + +} diff --git a/mt/ay_matmul/.matmul.c.swp b/mt/ay_matmul/.matmul.c.swp deleted file mode 100644 index 9ebeb79..0000000 Binary files a/mt/ay_matmul/.matmul.c.swp and /dev/null differ diff --git a/mt/ay_matmul/ay_matmul.c b/mt/ay_matmul/ay_matmul.c deleted file mode 100644 index 857a78f..0000000 --- a/mt/ay_matmul/ay_matmul.c +++ /dev/null @@ -1,210 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - static __thread int i, j, k; - static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; - static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; - - static __thread int start, end, jStride, jToRow, jToCol; - - start = coreid << 9; - end = (coreid+1) << 9; - jStride = 8; - - for (j=start; j < end; j+=jStride) { - jToRow = (j>>5)<<5; - jToCol = j%32; - tempC0 = 0; - tempC1 = 0; - tempC2 = 0; - tempC3 = 0; - tempC4 = 0; - tempC5 = 0; - tempC6 = 0; - tempC7 = 0; - for ( i=0; i < lda; i+=2 ) { - tempA0 = A[i + jToRow]; - tempA1 = A[i+1 + jToRow]; - tempC0 += tempA0 * B[(jToCol ) + (i<<5)]; - tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)]; - tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)]; - tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)]; - tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)]; - tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)]; - tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)]; - tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)]; - tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)]; - tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)]; - tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)]; - tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)]; - tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)]; - tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)]; - tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)]; - tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)]; - } - C[j] =tempC0; - C[j + 1 ]=tempC1; - C[j + 2 ]=tempC2; - C[j + 3 ]=tempC3; - C[j + 4 ]=tempC4; - C[j + 5 ]=tempC5; - C[j + 6 ]=tempC6; - C[j + 7 ]=tempC7; - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - //// Execute the provided, naive matmul - //barrier(nc); - //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - // - //// verify - //verifyMT(ARRAY_SIZE, results_data, verify_data); - // - //// clear results from the first trial - //size_t i; - //if (coreid == 0) - // for (i=0; i < ARRAY_SIZE; i++) - // results_data[i] = 0; - //barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - - diff --git a/mt/ay_matmul/dataset.h b/mt/ay_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ay_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ay_matmul/matmul_gendata.pl b/mt/ay_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ay_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ay_matmul/matmul_mi.c b/mt/ay_matmul/matmul_mi.c deleted file mode 100644 index 1a42e83..0000000 --- a/mt/ay_matmul/matmul_mi.c +++ /dev/null @@ -1,258 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - static __thread int i, j, k; - static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; - static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; - - static __thread int start, end, jStride, jToRow, jToCol; - static data_t A1[1024], B1[1024];; - - start = coreid << 9; - end = (coreid+1) << 9; - jStride = 8; - - if (coreid == 0) { - for (j=start; j < end; j+=jStride) { - jToRow = (j>>5)<<5; - jToCol = j%32; - tempC0 = 0; - tempC1 = 0; - tempC2 = 0; - tempC3 = 0; - tempC4 = 0; - tempC5 = 0; - tempC6 = 0; - tempC7 = 0; - for ( i=0; i < lda; i+=2 ) { - tempA0 = A[i + jToRow]; - tempA1 = A[i+1 + jToRow]; - tempC0 += tempA0 * B[(jToCol ) + (i<<5)]; - tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)]; - tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)]; - tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)]; - tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)]; - tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)]; - tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)]; - tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)]; - tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)]; - tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)]; - tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)]; - tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)]; - tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)]; - tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)]; - tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)]; - tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)]; - } - C[j] =tempC0; - C[j + 1 ]=tempC1; - C[j + 2 ]=tempC2; - C[j + 3 ]=tempC3; - C[j + 4 ]=tempC4; - C[j + 5 ]=tempC5; - C[j + 6 ]=tempC6; - C[j + 7 ]=tempC7; - } - } - else { - for (i = 0; i < 1024; i++) { - A1[i] = A[i]; - B1[i] = B[i]; - } - for (j=start; j < end; j+=jStride) { - jToRow = (j>>5)<<5; - jToCol = j%32; - tempC0 = 0; - tempC1 = 0; - tempC2 = 0; - tempC3 = 0; - tempC4 = 0; - tempC5 = 0; - tempC6 = 0; - tempC7 = 0; - for ( i=0; i < lda; i+=2 ) { - tempA0 = A1[i + jToRow]; - tempA1 = A1[i+1 + jToRow]; - tempC0 += tempA0 * B1[(jToCol ) + (i<<5)]; - tempC1 += tempA0 * B1[(jToCol+1 ) + (i<<5)]; - tempC2 += tempA0 * B1[(jToCol+2 ) + (i<<5)]; - tempC3 += tempA0 * B1[(jToCol+3 ) + (i<<5)]; - tempC4 += tempA0 * B1[(jToCol+4 ) + (i<<5)]; - tempC5 += tempA0 * B1[(jToCol+5 ) + (i<<5)]; - tempC6 += tempA0 * B1[(jToCol+6 ) + (i<<5)]; - tempC7 += tempA0 * B1[(jToCol+7 ) + (i<<5)]; - tempC0 += tempA1 * B1[(jToCol ) + ((i+1)<<5)]; - tempC1 += tempA1 * B1[(jToCol+1 ) + ((i+1)<<5)]; - tempC2 += tempA1 * B1[(jToCol+2 ) + ((i+1)<<5)]; - tempC3 += tempA1 * B1[(jToCol+3 ) + ((i+1)<<5)]; - tempC4 += tempA1 * B1[(jToCol+4 ) + ((i+1)<<5)]; - tempC5 += tempA1 * B1[(jToCol+5 ) + ((i+1)<<5)]; - tempC6 += tempA1 * B1[(jToCol+6 ) + ((i+1)<<5)]; - tempC7 += tempA1 * B1[(jToCol+7 ) + ((i+1)<<5)]; - } - C[j] =tempC0; - C[j + 1 ]=tempC1; - C[j + 2 ]=tempC2; - C[j + 3 ]=tempC3; - C[j + 4 ]=tempC4; - C[j + 5 ]=tempC5; - C[j + 6 ]=tempC6; - C[j + 7 ]=tempC7; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - //// Execute the provided, naive matmul - //barrier(nc); - //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - // - //// verify - //verifyMT(ARRAY_SIZE, results_data, verify_data); - // - //// clear results from the first trial - //size_t i; - //if (coreid == 0) - // for (i=0; i < ARRAY_SIZE; i++) - // results_data[i] = 0; - //barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - - diff --git a/mt/ay_vvadd/ay_vvadd.c b/mt/ay_vvadd/ay_vvadd.c deleted file mode 100755 index 2bf8da9..0000000 --- a/mt/ay_vvadd/ay_vvadd.c +++ /dev/null @@ -1,175 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // Each core uses its own block - if (coreid == 0) { - for (i = 0; i < (n/2); i++) { - x[i] = x[i] + y[i]; - } - } - else { - for (i = (n/2); i < n; i++) { - x[i] = x[i] + y[i]; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ay_vvadd/dataset.h b/mt/ay_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ay_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ay_vvadd/vvadd_gendata.pl b/mt/ay_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ay_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/az_matmul.c b/mt/az_matmul.c new file mode 100755 index 0000000..1668fb0 --- /dev/null +++ b/mt/az_matmul.c @@ -0,0 +1,181 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + if(coreid > 1) return; + static __thread int i, j, k; + static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; + static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; //tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; + + static __thread int start, end, jStride, jToRow, jToCol, iToRow; + + start = coreid << 9; + end = ((ncores == 1) ? 2 : (coreid+1)) << 9; + jStride = 8; + + for (j=start; j < end; j+=jStride) { + jToRow = (j>>5)<<5; + jToCol = j%32; + tempC0 = 0; + tempC1 = 0; + tempC2 = 0; + tempC3 = 0; + tempC4 = 0; + tempC5 = 0; + tempC6 = 0; + tempC7 = 0; + //tempC8 = 0; + //tempC9 = 0; + //tempC10 = 0; + //tempC11 = 0; + //tempC12 = 0; + //tempC13 = 0; + //tempC14 = 0; + //tempC15 = 0; + + for ( i=0; i < lda; i+=2 ) { + iToRow = i << 5; + + tempA0 = A[i + jToRow]; + tempA1 = A[i+1 + jToRow]; + //tempA2 = A[i+2 + jToRow]; + //tempA3 = A[i+3 + jToRow]; + //tempA4 = A[i+4 + jToRow]; + //tempA5 = A[i+5 + jToRow]; + //tempA6 = A[i+6 + jToRow]; + //tempA7 = A[i+7 + jToRow]; + + tempC0 += tempA0 * B[(jToCol ) + (iToRow)]; + tempC1 += tempA0 * B[(jToCol+1 ) + (iToRow)]; + tempC2 += tempA0 * B[(jToCol+2 ) + (iToRow)]; + tempC3 += tempA0 * B[(jToCol+3 ) + (iToRow)]; + tempC4 += tempA0 * B[(jToCol+4 ) + (iToRow)]; + tempC5 += tempA0 * B[(jToCol+5 ) + (iToRow)]; + tempC6 += tempA0 * B[(jToCol+6 ) + (iToRow)]; + tempC7 += tempA0 * B[(jToCol+7 ) + (iToRow)]; + //tempC8 += tempA0 * B[(jToCol+8 ) + (iToRow)]; + //tempC9 += tempA0 * B[(jToCol+9 ) + (iToRow)]; + //tempC10 += tempA0 * B[(jToCol+10) + (iToRow)]; + //tempC11 += tempA0 * B[(jToCol+11) + (iToRow)]; + //tempC12 += tempA0 * B[(jToCol+12) + (iToRow)]; + //tempC13 += tempA0 * B[(jToCol+13) + (iToRow)]; + //tempC14 += tempA0 * B[(jToCol+14) + (iToRow)]; + //tempC15 += tempA0 * B[(jToCol+15) + (iToRow)]; + + iToRow += 32; + tempC0 += tempA1 * B[(jToCol ) + (iToRow)]; + tempC1 += tempA1 * B[(jToCol+1 ) + (iToRow)]; + tempC2 += tempA1 * B[(jToCol+2 ) + (iToRow)]; + tempC3 += tempA1 * B[(jToCol+3 ) + (iToRow)]; + tempC4 += tempA1 * B[(jToCol+4 ) + (iToRow)]; + tempC5 += tempA1 * B[(jToCol+5 ) + (iToRow)]; + tempC6 += tempA1 * B[(jToCol+6 ) + (iToRow)]; + tempC7 += tempA1 * B[(jToCol+7 ) + (iToRow)]; + //tempC8 += tempA1 * B[(jToCol+8 ) + (iToRow+32)]; + //tempC9 += tempA1 * B[(jToCol+9 ) + (iToRow+32)]; + //tempC10 += tempA1 * B[(jToCol+10) + (iToRow+32)]; + //tempC11 += tempA1 * B[(jToCol+11) + (iToRow+32)]; + //tempC12 += tempA1 * B[(jToCol+12) + (iToRow+32)]; + //tempC13 += tempA1 * B[(jToCol+13) + (iToRow+32)]; + //tempC14 += tempA1 * B[(jToCol+14) + (iToRow+32)]; + //tempC15 += tempA1 * B[(jToCol+15) + (iToRow+32)]; + + //iToRow += 32; + //tempC0 += tempA2 * B[(jToCol ) + (iToRow)]; + //tempC1 += tempA2 * B[(jToCol+1 ) + (iToRow)]; + //tempC2 += tempA2 * B[(jToCol+2 ) + (iToRow)]; + //tempC3 += tempA2 * B[(jToCol+3 ) + (iToRow)]; + //tempC4 += tempA2 * B[(jToCol+4 ) + (iToRow)]; + //tempC5 += tempA2 * B[(jToCol+5 ) + (iToRow)]; + //tempC6 += tempA2 * B[(jToCol+6 ) + (iToRow)]; + //tempC7 += tempA2 * B[(jToCol+7 ) + (iToRow)]; + //tempC8 += tempA2 * B[(jToCol+8 ) + (iToRow)]; + //tempC9 += tempA2 * B[(jToCol+9 ) + (iToRow)]; + //tempC10 += tempA2 * B[(jToCol+10) + (iToRow)]; + //tempC11 += tempA2 * B[(jToCol+11) + (iToRow)]; + //tempC12 += tempA2 * B[(jToCol+12) + (iToRow)]; + //tempC13 += tempA2 * B[(jToCol+13) + (iToRow)]; + //tempC14 += tempA2 * B[(jToCol+14) + (iToRow)]; + //tempC15 += tempA2 * B[(jToCol+15) + (iToRow)]; + + //iToRow += 32; + //tempC0 += tempA3 * B[(jToCol ) + (iToRow)]; + //tempC1 += tempA3 * B[(jToCol+1 ) + (iToRow)]; + //tempC2 += tempA3 * B[(jToCol+2 ) + (iToRow)]; + //tempC3 += tempA3 * B[(jToCol+3 ) + (iToRow)]; + //tempC4 += tempA3 * B[(jToCol+4 ) + (iToRow)]; + //tempC5 += tempA3 * B[(jToCol+5 ) + (iToRow)]; + //tempC6 += tempA3 * B[(jToCol+6 ) + (iToRow)]; + //tempC7 += tempA3 * B[(jToCol+7 ) + (iToRow)]; + //tempC8 += tempA3 * B[(jToCol+8 ) + (iToRow)]; + //tempC9 += tempA3 * B[(jToCol+9 ) + (iToRow)]; + //tempC10 += tempA3 * B[(jToCol+10) + (iToRow)]; + //tempC11 += tempA3 * B[(jToCol+11) + (iToRow)]; + //tempC12 += tempA3 * B[(jToCol+12) + (iToRow)]; + //tempC13 += tempA3 * B[(jToCol+13) + (iToRow)]; + //tempC14 += tempA3 * B[(jToCol+14) + (iToRow)]; + //tempC15 += tempA3 * B[(jToCol+15) + (iToRow)]; + + //iToRow += 32; + //tempC0 += tempA4 * B[(jToCol ) + (iToRow)]; + //tempC1 += tempA4 * B[(jToCol+1 ) + (iToRow)]; + //tempC2 += tempA4 * B[(jToCol+2 ) + (iToRow)]; + //tempC3 += tempA4 * B[(jToCol+3 ) + (iToRow)]; + //tempC4 += tempA4 * B[(jToCol+4 ) + (iToRow)]; + //tempC5 += tempA4 * B[(jToCol+5 ) + (iToRow)]; + //tempC6 += tempA4 * B[(jToCol+6 ) + (iToRow)]; + //tempC7 += tempA4 * B[(jToCol+7 ) + (iToRow)]; + // + //iToRow += 32; + //tempC0 += tempA5 * B[(jToCol ) + (iToRow)]; + //tempC1 += tempA5 * B[(jToCol+1 ) + (iToRow)]; + //tempC2 += tempA5 * B[(jToCol+2 ) + (iToRow)]; + //tempC3 += tempA5 * B[(jToCol+3 ) + (iToRow)]; + //tempC4 += tempA5 * B[(jToCol+4 ) + (iToRow)]; + //tempC5 += tempA5 * B[(jToCol+5 ) + (iToRow)]; + //tempC6 += tempA5 * B[(jToCol+6 ) + (iToRow)]; + //tempC7 += tempA5 * B[(jToCol+7 ) + (iToRow)]; + // + //iToRow += 32; + //tempC0 += tempA6 * B[(jToCol ) + (iToRow)]; + //tempC1 += tempA6 * B[(jToCol+1 ) + (iToRow)]; + //tempC2 += tempA6 * B[(jToCol+2 ) + (iToRow)]; + //tempC3 += tempA6 * B[(jToCol+3 ) + (iToRow)]; + //tempC4 += tempA6 * B[(jToCol+4 ) + (iToRow)]; + //tempC5 += tempA6 * B[(jToCol+5 ) + (iToRow)]; + //tempC6 += tempA6 * B[(jToCol+6 ) + (iToRow)]; + //tempC7 += tempA6 * B[(jToCol+7 ) + (iToRow)]; + // + //iToRow += 32; + //tempC0 += tempA7 * B[(jToCol ) + (iToRow)]; + //tempC1 += tempA7 * B[(jToCol+1 ) + (iToRow)]; + //tempC2 += tempA7 * B[(jToCol+2 ) + (iToRow)]; + //tempC3 += tempA7 * B[(jToCol+3 ) + (iToRow)]; + //tempC4 += tempA7 * B[(jToCol+4 ) + (iToRow)]; + //tempC5 += tempA7 * B[(jToCol+5 ) + (iToRow)]; + //tempC6 += tempA7 * B[(jToCol+6 ) + (iToRow)]; + //tempC7 += tempA7 * B[(jToCol+7 ) + (iToRow)]; + + } + C[j ] = tempC0; + C[j + 1 ] = tempC1; + C[j + 2 ] = tempC2; + C[j + 3 ] = tempC3; + C[j + 4 ] = tempC4; + C[j + 5 ] = tempC5; + C[j + 6 ] = tempC6; + C[j + 7 ] = tempC7; + //C[j + 8 ] = tempC8 ; + //C[j + 9 ] = tempC9 ; + //C[j + 10] = tempC10; + //C[j + 11] = tempC11; + //C[j + 12] = tempC12; + //C[j + 13] = tempC13; + //C[j + 14] = tempC14; + //C[j + 15] = tempC15; + } +} diff --git a/mt/az_matmul/.matmul.c.swp b/mt/az_matmul/.matmul.c.swp deleted file mode 100644 index f9021cb..0000000 Binary files a/mt/az_matmul/.matmul.c.swp and /dev/null differ diff --git a/mt/az_matmul/az_matmul.c b/mt/az_matmul/az_matmul.c deleted file mode 100755 index 5bfd15f..0000000 --- a/mt/az_matmul/az_matmul.c +++ /dev/null @@ -1,416 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -data_t ffmul(data_t a, data_t b) { - data_t result = 0; - - for (int i=0; i < b; i++) { - result += a; - } - - return result; -} - - -//void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -//{ -// -// // ***************************** // -// // **** ADD YOUR CODE HERE ***** // -// // ***************************** // -// // -// // feel free to make a separate function for MI and MSI versions. -// -// static __thread int i, j, k; -// static __thread int jlda, ilda; -// static __thread data_t tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7, tempA8; -// static __thread int start, end; -// -// start = coreid*(lda>>1); -// end = (coreid+1)*(lda>>1); -// -// for (j=start; j < end; j+=1) { -// jlda = j * lda; -// for ( i=0; i < lda; i+=1 ) { -// ilda = i*lda; -// tempA1 = A[i + jlda]; -// //tempA2 = A[i+1 + jlda]; -// //tempA3 = A[i+2 + jlda]; -// //tempA4 = A[i+3 + jlda]; -// //tempA5 = A[i+4 + jlda]; -// //tempA6 = A[i+5 + jlda]; -// //tempA7 = A[i+6 + jlda]; -// //tempA8 = A[i+7 + jlda]; -// //tempC1 = C[i + j*lda]; -// //tempC2 = C[i+1 + j*lda]; -// for(k=0; k < lda; k+=1) { -// //C[k + jlda] += tempA1 * B[k + i*lda] + tempA2 * B[k + (i+1)*lda] + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] + -// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda]; -// -// C[k + jlda] += tempA1* B[k + i*lda];// + ffmul(tempA2,B[k + (i+1)*lda]) + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] + -// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda]; -// // -// //C[k+1 + jlda] += tempA1 * B[k+1 + i*lda] + tempA2 * B[k+1 + (i+1)*lda] + tempA3 * B[k+1 + (i+2)*lda] + tempA4 * B[k+1 + (i+3)*lda] + -// // tempA5 * B[k+1 + (i+4)*lda] + tempA6 * B[k+1 + (i+5)*lda] + tempA7 * B[k+1 + (i+6)*lda] + tempA8 * B[k+1 + (i+7)*lda]; -// // -// //C[k+2 + jlda] += tempA1 * B[k+2 + i*lda] + tempA2 * B[k+2 + (i+1)*lda] + tempA3 * B[k+2 + (i+2)*lda] + tempA4 * B[k+2 + (i+3)*lda] + -// // tempA5 * B[k+2 + (i+4)*lda] + tempA6 * B[k+2 + (i+5)*lda] + tempA7 * B[k+2 + (i+6)*lda] + tempA8 * B[k+2 + (i+7)*lda]; -// // -// //C[k+3 + jlda] += tempA1 * B[k+3 + i*lda] + tempA2 * B[k+3 + (i+1)*lda] + tempA3 * B[k+3 + (i+2)*lda] + tempA4 * B[k+3 + (i+3)*lda] + -// // tempA5 * B[k+3 + (i+4)*lda] + tempA6 * B[k+3 + (i+5)*lda] + tempA7 * B[k+3 + (i+6)*lda] + tempA8 * B[k+3 + (i+7)*lda]; -// // -// //C[k+4 + jlda] += tempA1 * B[k+4 + i*lda] + tempA2 * B[k+4 + (i+1)*lda] + tempA3 * B[k+4 + (i+2)*lda] + tempA4 * B[k+4 + (i+3)*lda] + -// // tempA5 * B[k+4 + (i+4)*lda] + tempA6 * B[k+4 + (i+5)*lda] + tempA7 * B[k+4 + (i+6)*lda] + tempA8 * B[k+4 + (i+7)*lda]; -// // -// //C[k+5 + jlda] += tempA1 * B[k+5 + i*lda] + tempA2 * B[k+5 + (i+1)*lda] + tempA3 * B[k+5 + (i+2)*lda] + tempA4 * B[k+5 + (i+3)*lda] + -// // tempA5 * B[k+5 + (i+4)*lda] + tempA6 * B[k+5 + (i+5)*lda] + tempA7 * B[k+5 + (i+6)*lda] + tempA8 * B[k+5 + (i+7)*lda]; -// // -// //C[k+6 + jlda] += tempA1 * B[k+6 + i*lda] + tempA2 * B[k+6 + (i+1)*lda] + tempA3 * B[k+6 + (i+2)*lda] + tempA4 * B[k+6 + (i+3)*lda] + -// // tempA5 * B[k+6 + (i+4)*lda] + tempA6 * B[k+6 + (i+5)*lda] + tempA7 * B[k+6 + (i+6)*lda] + tempA8 * B[k+6 + (i+7)*lda]; -// // -// //C[k+7 + jlda] += tempA1 * B[k+7 + i*lda] + tempA2 * B[k+7 + (i+1)*lda] + tempA3 * B[k+7 + (i+2)*lda] + tempA4 * B[k+7 + (i+3)*lda] + -// // tempA5 * B[k+7 + (i+4)*lda] + tempA6 * B[k+7 + (i+5)*lda] + tempA7 * B[k+7 + (i+6)*lda] + tempA8 * B[k+7 + (i+7)*lda]; -// -// -// } -// } -// } -//} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - static __thread int i, j, k; - static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; - static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; //tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; - - static __thread int start, end, jStride, jToRow, jToCol, iToRow; - - start = coreid << 9; - end = (coreid+1) << 9; - jStride = 8; - - for (j=start; j < end; j+=jStride) { - jToRow = (j>>5)<<5; - jToCol = j%32; - tempC0 = 0; - tempC1 = 0; - tempC2 = 0; - tempC3 = 0; - tempC4 = 0; - tempC5 = 0; - tempC6 = 0; - tempC7 = 0; - //tempC8 = 0; - //tempC9 = 0; - //tempC10 = 0; - //tempC11 = 0; - //tempC12 = 0; - //tempC13 = 0; - //tempC14 = 0; - //tempC15 = 0; - - for ( i=0; i < lda; i+=2 ) { - iToRow = i << 5; - - tempA0 = A[i + jToRow]; - tempA1 = A[i+1 + jToRow]; - //tempA2 = A[i+2 + jToRow]; - //tempA3 = A[i+3 + jToRow]; - //tempA4 = A[i+4 + jToRow]; - //tempA5 = A[i+5 + jToRow]; - //tempA6 = A[i+6 + jToRow]; - //tempA7 = A[i+7 + jToRow]; - - tempC0 += tempA0 * B[(jToCol ) + (iToRow)]; - tempC1 += tempA0 * B[(jToCol+1 ) + (iToRow)]; - tempC2 += tempA0 * B[(jToCol+2 ) + (iToRow)]; - tempC3 += tempA0 * B[(jToCol+3 ) + (iToRow)]; - tempC4 += tempA0 * B[(jToCol+4 ) + (iToRow)]; - tempC5 += tempA0 * B[(jToCol+5 ) + (iToRow)]; - tempC6 += tempA0 * B[(jToCol+6 ) + (iToRow)]; - tempC7 += tempA0 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA0 * B[(jToCol+8 ) + (iToRow)]; - //tempC9 += tempA0 * B[(jToCol+9 ) + (iToRow)]; - //tempC10 += tempA0 * B[(jToCol+10) + (iToRow)]; - //tempC11 += tempA0 * B[(jToCol+11) + (iToRow)]; - //tempC12 += tempA0 * B[(jToCol+12) + (iToRow)]; - //tempC13 += tempA0 * B[(jToCol+13) + (iToRow)]; - //tempC14 += tempA0 * B[(jToCol+14) + (iToRow)]; - //tempC15 += tempA0 * B[(jToCol+15) + (iToRow)]; - - iToRow += 32; - tempC0 += tempA1 * B[(jToCol ) + (iToRow)]; - tempC1 += tempA1 * B[(jToCol+1 ) + (iToRow)]; - tempC2 += tempA1 * B[(jToCol+2 ) + (iToRow)]; - tempC3 += tempA1 * B[(jToCol+3 ) + (iToRow)]; - tempC4 += tempA1 * B[(jToCol+4 ) + (iToRow)]; - tempC5 += tempA1 * B[(jToCol+5 ) + (iToRow)]; - tempC6 += tempA1 * B[(jToCol+6 ) + (iToRow)]; - tempC7 += tempA1 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA1 * B[(jToCol+8 ) + (iToRow+32)]; - //tempC9 += tempA1 * B[(jToCol+9 ) + (iToRow+32)]; - //tempC10 += tempA1 * B[(jToCol+10) + (iToRow+32)]; - //tempC11 += tempA1 * B[(jToCol+11) + (iToRow+32)]; - //tempC12 += tempA1 * B[(jToCol+12) + (iToRow+32)]; - //tempC13 += tempA1 * B[(jToCol+13) + (iToRow+32)]; - //tempC14 += tempA1 * B[(jToCol+14) + (iToRow+32)]; - //tempC15 += tempA1 * B[(jToCol+15) + (iToRow+32)]; - - //iToRow += 32; - //tempC0 += tempA2 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA2 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA2 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA2 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA2 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA2 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA2 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA2 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA2 * B[(jToCol+8 ) + (iToRow)]; - //tempC9 += tempA2 * B[(jToCol+9 ) + (iToRow)]; - //tempC10 += tempA2 * B[(jToCol+10) + (iToRow)]; - //tempC11 += tempA2 * B[(jToCol+11) + (iToRow)]; - //tempC12 += tempA2 * B[(jToCol+12) + (iToRow)]; - //tempC13 += tempA2 * B[(jToCol+13) + (iToRow)]; - //tempC14 += tempA2 * B[(jToCol+14) + (iToRow)]; - //tempC15 += tempA2 * B[(jToCol+15) + (iToRow)]; - - //iToRow += 32; - //tempC0 += tempA3 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA3 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA3 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA3 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA3 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA3 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA3 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA3 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA3 * B[(jToCol+8 ) + (iToRow)]; - //tempC9 += tempA3 * B[(jToCol+9 ) + (iToRow)]; - //tempC10 += tempA3 * B[(jToCol+10) + (iToRow)]; - //tempC11 += tempA3 * B[(jToCol+11) + (iToRow)]; - //tempC12 += tempA3 * B[(jToCol+12) + (iToRow)]; - //tempC13 += tempA3 * B[(jToCol+13) + (iToRow)]; - //tempC14 += tempA3 * B[(jToCol+14) + (iToRow)]; - //tempC15 += tempA3 * B[(jToCol+15) + (iToRow)]; - - //iToRow += 32; - //tempC0 += tempA4 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA4 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA4 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA4 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA4 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA4 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA4 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA4 * B[(jToCol+7 ) + (iToRow)]; - // - //iToRow += 32; - //tempC0 += tempA5 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA5 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA5 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA5 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA5 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA5 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA5 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA5 * B[(jToCol+7 ) + (iToRow)]; - // - //iToRow += 32; - //tempC0 += tempA6 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA6 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA6 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA6 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA6 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA6 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA6 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA6 * B[(jToCol+7 ) + (iToRow)]; - // - //iToRow += 32; - //tempC0 += tempA7 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA7 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA7 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA7 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA7 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA7 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA7 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA7 * B[(jToCol+7 ) + (iToRow)]; - - } - C[j ] = tempC0; - C[j + 1 ] = tempC1; - C[j + 2 ] = tempC2; - C[j + 3 ] = tempC3; - C[j + 4 ] = tempC4; - C[j + 5 ] = tempC5; - C[j + 6 ] = tempC6; - C[j + 7 ] = tempC7; - //C[j + 8 ] = tempC8 ; - //C[j + 9 ] = tempC9 ; - //C[j + 10] = tempC10; - //C[j + 11] = tempC11; - //C[j + 12] = tempC12; - //C[j + 13] = tempC13; - //C[j + 14] = tempC14; - //C[j + 15] = tempC15; - } -} - - - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - //// Execute the provided, naive matmul - //barrier(nc); - //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - // - //// verify - //verifyMT(ARRAY_SIZE, results_data, verify_data); - // - //// clear results from the first trial - //size_t i; - //if (coreid == 0) - // for (i=0; i < ARRAY_SIZE; i++) - // results_data[i] = 0; - //barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/az_matmul/dataset.h b/mt/az_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/az_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/az_matmul/matmul_gendata.pl b/mt/az_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/az_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/az_matmul/matmul_mi.c b/mt/az_matmul/matmul_mi.c deleted file mode 100755 index 5bfd15f..0000000 --- a/mt/az_matmul/matmul_mi.c +++ /dev/null @@ -1,416 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - -data_t ffmul(data_t a, data_t b) { - data_t result = 0; - - for (int i=0; i < b; i++) { - result += a; - } - - return result; -} - - -//void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -//{ -// -// // ***************************** // -// // **** ADD YOUR CODE HERE ***** // -// // ***************************** // -// // -// // feel free to make a separate function for MI and MSI versions. -// -// static __thread int i, j, k; -// static __thread int jlda, ilda; -// static __thread data_t tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7, tempA8; -// static __thread int start, end; -// -// start = coreid*(lda>>1); -// end = (coreid+1)*(lda>>1); -// -// for (j=start; j < end; j+=1) { -// jlda = j * lda; -// for ( i=0; i < lda; i+=1 ) { -// ilda = i*lda; -// tempA1 = A[i + jlda]; -// //tempA2 = A[i+1 + jlda]; -// //tempA3 = A[i+2 + jlda]; -// //tempA4 = A[i+3 + jlda]; -// //tempA5 = A[i+4 + jlda]; -// //tempA6 = A[i+5 + jlda]; -// //tempA7 = A[i+6 + jlda]; -// //tempA8 = A[i+7 + jlda]; -// //tempC1 = C[i + j*lda]; -// //tempC2 = C[i+1 + j*lda]; -// for(k=0; k < lda; k+=1) { -// //C[k + jlda] += tempA1 * B[k + i*lda] + tempA2 * B[k + (i+1)*lda] + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] + -// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda]; -// -// C[k + jlda] += tempA1* B[k + i*lda];// + ffmul(tempA2,B[k + (i+1)*lda]) + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] + -// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda]; -// // -// //C[k+1 + jlda] += tempA1 * B[k+1 + i*lda] + tempA2 * B[k+1 + (i+1)*lda] + tempA3 * B[k+1 + (i+2)*lda] + tempA4 * B[k+1 + (i+3)*lda] + -// // tempA5 * B[k+1 + (i+4)*lda] + tempA6 * B[k+1 + (i+5)*lda] + tempA7 * B[k+1 + (i+6)*lda] + tempA8 * B[k+1 + (i+7)*lda]; -// // -// //C[k+2 + jlda] += tempA1 * B[k+2 + i*lda] + tempA2 * B[k+2 + (i+1)*lda] + tempA3 * B[k+2 + (i+2)*lda] + tempA4 * B[k+2 + (i+3)*lda] + -// // tempA5 * B[k+2 + (i+4)*lda] + tempA6 * B[k+2 + (i+5)*lda] + tempA7 * B[k+2 + (i+6)*lda] + tempA8 * B[k+2 + (i+7)*lda]; -// // -// //C[k+3 + jlda] += tempA1 * B[k+3 + i*lda] + tempA2 * B[k+3 + (i+1)*lda] + tempA3 * B[k+3 + (i+2)*lda] + tempA4 * B[k+3 + (i+3)*lda] + -// // tempA5 * B[k+3 + (i+4)*lda] + tempA6 * B[k+3 + (i+5)*lda] + tempA7 * B[k+3 + (i+6)*lda] + tempA8 * B[k+3 + (i+7)*lda]; -// // -// //C[k+4 + jlda] += tempA1 * B[k+4 + i*lda] + tempA2 * B[k+4 + (i+1)*lda] + tempA3 * B[k+4 + (i+2)*lda] + tempA4 * B[k+4 + (i+3)*lda] + -// // tempA5 * B[k+4 + (i+4)*lda] + tempA6 * B[k+4 + (i+5)*lda] + tempA7 * B[k+4 + (i+6)*lda] + tempA8 * B[k+4 + (i+7)*lda]; -// // -// //C[k+5 + jlda] += tempA1 * B[k+5 + i*lda] + tempA2 * B[k+5 + (i+1)*lda] + tempA3 * B[k+5 + (i+2)*lda] + tempA4 * B[k+5 + (i+3)*lda] + -// // tempA5 * B[k+5 + (i+4)*lda] + tempA6 * B[k+5 + (i+5)*lda] + tempA7 * B[k+5 + (i+6)*lda] + tempA8 * B[k+5 + (i+7)*lda]; -// // -// //C[k+6 + jlda] += tempA1 * B[k+6 + i*lda] + tempA2 * B[k+6 + (i+1)*lda] + tempA3 * B[k+6 + (i+2)*lda] + tempA4 * B[k+6 + (i+3)*lda] + -// // tempA5 * B[k+6 + (i+4)*lda] + tempA6 * B[k+6 + (i+5)*lda] + tempA7 * B[k+6 + (i+6)*lda] + tempA8 * B[k+6 + (i+7)*lda]; -// // -// //C[k+7 + jlda] += tempA1 * B[k+7 + i*lda] + tempA2 * B[k+7 + (i+1)*lda] + tempA3 * B[k+7 + (i+2)*lda] + tempA4 * B[k+7 + (i+3)*lda] + -// // tempA5 * B[k+7 + (i+4)*lda] + tempA6 * B[k+7 + (i+5)*lda] + tempA7 * B[k+7 + (i+6)*lda] + tempA8 * B[k+7 + (i+7)*lda]; -// -// -// } -// } -// } -//} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - static __thread int i, j, k; - static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; - static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; //tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; - - static __thread int start, end, jStride, jToRow, jToCol, iToRow; - - start = coreid << 9; - end = (coreid+1) << 9; - jStride = 8; - - for (j=start; j < end; j+=jStride) { - jToRow = (j>>5)<<5; - jToCol = j%32; - tempC0 = 0; - tempC1 = 0; - tempC2 = 0; - tempC3 = 0; - tempC4 = 0; - tempC5 = 0; - tempC6 = 0; - tempC7 = 0; - //tempC8 = 0; - //tempC9 = 0; - //tempC10 = 0; - //tempC11 = 0; - //tempC12 = 0; - //tempC13 = 0; - //tempC14 = 0; - //tempC15 = 0; - - for ( i=0; i < lda; i+=2 ) { - iToRow = i << 5; - - tempA0 = A[i + jToRow]; - tempA1 = A[i+1 + jToRow]; - //tempA2 = A[i+2 + jToRow]; - //tempA3 = A[i+3 + jToRow]; - //tempA4 = A[i+4 + jToRow]; - //tempA5 = A[i+5 + jToRow]; - //tempA6 = A[i+6 + jToRow]; - //tempA7 = A[i+7 + jToRow]; - - tempC0 += tempA0 * B[(jToCol ) + (iToRow)]; - tempC1 += tempA0 * B[(jToCol+1 ) + (iToRow)]; - tempC2 += tempA0 * B[(jToCol+2 ) + (iToRow)]; - tempC3 += tempA0 * B[(jToCol+3 ) + (iToRow)]; - tempC4 += tempA0 * B[(jToCol+4 ) + (iToRow)]; - tempC5 += tempA0 * B[(jToCol+5 ) + (iToRow)]; - tempC6 += tempA0 * B[(jToCol+6 ) + (iToRow)]; - tempC7 += tempA0 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA0 * B[(jToCol+8 ) + (iToRow)]; - //tempC9 += tempA0 * B[(jToCol+9 ) + (iToRow)]; - //tempC10 += tempA0 * B[(jToCol+10) + (iToRow)]; - //tempC11 += tempA0 * B[(jToCol+11) + (iToRow)]; - //tempC12 += tempA0 * B[(jToCol+12) + (iToRow)]; - //tempC13 += tempA0 * B[(jToCol+13) + (iToRow)]; - //tempC14 += tempA0 * B[(jToCol+14) + (iToRow)]; - //tempC15 += tempA0 * B[(jToCol+15) + (iToRow)]; - - iToRow += 32; - tempC0 += tempA1 * B[(jToCol ) + (iToRow)]; - tempC1 += tempA1 * B[(jToCol+1 ) + (iToRow)]; - tempC2 += tempA1 * B[(jToCol+2 ) + (iToRow)]; - tempC3 += tempA1 * B[(jToCol+3 ) + (iToRow)]; - tempC4 += tempA1 * B[(jToCol+4 ) + (iToRow)]; - tempC5 += tempA1 * B[(jToCol+5 ) + (iToRow)]; - tempC6 += tempA1 * B[(jToCol+6 ) + (iToRow)]; - tempC7 += tempA1 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA1 * B[(jToCol+8 ) + (iToRow+32)]; - //tempC9 += tempA1 * B[(jToCol+9 ) + (iToRow+32)]; - //tempC10 += tempA1 * B[(jToCol+10) + (iToRow+32)]; - //tempC11 += tempA1 * B[(jToCol+11) + (iToRow+32)]; - //tempC12 += tempA1 * B[(jToCol+12) + (iToRow+32)]; - //tempC13 += tempA1 * B[(jToCol+13) + (iToRow+32)]; - //tempC14 += tempA1 * B[(jToCol+14) + (iToRow+32)]; - //tempC15 += tempA1 * B[(jToCol+15) + (iToRow+32)]; - - //iToRow += 32; - //tempC0 += tempA2 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA2 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA2 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA2 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA2 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA2 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA2 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA2 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA2 * B[(jToCol+8 ) + (iToRow)]; - //tempC9 += tempA2 * B[(jToCol+9 ) + (iToRow)]; - //tempC10 += tempA2 * B[(jToCol+10) + (iToRow)]; - //tempC11 += tempA2 * B[(jToCol+11) + (iToRow)]; - //tempC12 += tempA2 * B[(jToCol+12) + (iToRow)]; - //tempC13 += tempA2 * B[(jToCol+13) + (iToRow)]; - //tempC14 += tempA2 * B[(jToCol+14) + (iToRow)]; - //tempC15 += tempA2 * B[(jToCol+15) + (iToRow)]; - - //iToRow += 32; - //tempC0 += tempA3 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA3 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA3 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA3 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA3 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA3 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA3 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA3 * B[(jToCol+7 ) + (iToRow)]; - //tempC8 += tempA3 * B[(jToCol+8 ) + (iToRow)]; - //tempC9 += tempA3 * B[(jToCol+9 ) + (iToRow)]; - //tempC10 += tempA3 * B[(jToCol+10) + (iToRow)]; - //tempC11 += tempA3 * B[(jToCol+11) + (iToRow)]; - //tempC12 += tempA3 * B[(jToCol+12) + (iToRow)]; - //tempC13 += tempA3 * B[(jToCol+13) + (iToRow)]; - //tempC14 += tempA3 * B[(jToCol+14) + (iToRow)]; - //tempC15 += tempA3 * B[(jToCol+15) + (iToRow)]; - - //iToRow += 32; - //tempC0 += tempA4 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA4 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA4 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA4 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA4 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA4 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA4 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA4 * B[(jToCol+7 ) + (iToRow)]; - // - //iToRow += 32; - //tempC0 += tempA5 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA5 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA5 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA5 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA5 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA5 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA5 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA5 * B[(jToCol+7 ) + (iToRow)]; - // - //iToRow += 32; - //tempC0 += tempA6 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA6 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA6 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA6 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA6 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA6 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA6 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA6 * B[(jToCol+7 ) + (iToRow)]; - // - //iToRow += 32; - //tempC0 += tempA7 * B[(jToCol ) + (iToRow)]; - //tempC1 += tempA7 * B[(jToCol+1 ) + (iToRow)]; - //tempC2 += tempA7 * B[(jToCol+2 ) + (iToRow)]; - //tempC3 += tempA7 * B[(jToCol+3 ) + (iToRow)]; - //tempC4 += tempA7 * B[(jToCol+4 ) + (iToRow)]; - //tempC5 += tempA7 * B[(jToCol+5 ) + (iToRow)]; - //tempC6 += tempA7 * B[(jToCol+6 ) + (iToRow)]; - //tempC7 += tempA7 * B[(jToCol+7 ) + (iToRow)]; - - } - C[j ] = tempC0; - C[j + 1 ] = tempC1; - C[j + 2 ] = tempC2; - C[j + 3 ] = tempC3; - C[j + 4 ] = tempC4; - C[j + 5 ] = tempC5; - C[j + 6 ] = tempC6; - C[j + 7 ] = tempC7; - //C[j + 8 ] = tempC8 ; - //C[j + 9 ] = tempC9 ; - //C[j + 10] = tempC10; - //C[j + 11] = tempC11; - //C[j + 12] = tempC12; - //C[j + 13] = tempC13; - //C[j + 14] = tempC14; - //C[j + 15] = tempC15; - } -} - - - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - //// Execute the provided, naive matmul - //barrier(nc); - //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - // - //// verify - //verifyMT(ARRAY_SIZE, results_data, verify_data); - // - //// clear results from the first trial - //size_t i; - //if (coreid == 0) - // for (i=0; i < ARRAY_SIZE; i++) - // results_data[i] = 0; - //barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/az_vvadd/az_vvadd.c b/mt/az_vvadd/az_vvadd.c deleted file mode 100755 index cf32ac7..0000000 --- a/mt/az_vvadd/az_vvadd.c +++ /dev/null @@ -1,174 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - size_t split = n / ncores; - - //interleave accesses - for (i = coreid * split; i < (coreid+1)*split-1 && i < n-1; i+=2) { - x[i] = x[i] + y[i]; - x[i+1] = x[i+1] + y[i+1]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/az_vvadd/dataset.h b/mt/az_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/az_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/az_vvadd/vvadd_gendata.pl b/mt/az_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/az_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ba_matmul/ba_matmul.c b/mt/ba_matmul/ba_matmul.c deleted file mode 100755 index 3f712c1..0000000 --- a/mt/ba_matmul/ba_matmul.c +++ /dev/null @@ -1,271 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -/* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -*/ -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t c_start = lda / ncores * coreid; - size_t c_row; - size_t c_col; - size_t colSplit = 0; - size_t i; - size_t useSplit = 0; - data_t a1; - data_t a2; - data_t a3; - data_t a4; - data_t a5; - data_t a6; - data_t a7; - data_t a8; - data_t c1; - data_t c2; - data_t c3; - data_t c4; - data_t c5; - data_t c6; - data_t c7; - data_t c8; - size_t block; - for (block = 0; block < 2; block++) { - for (colSplit = 0; colSplit < 4; colSplit++) { - useSplit = (coreid == 0) ? colSplit : (colSplit + 2 ) % 4; - for (c_row = c_start + block * 8; c_row < c_start + block * 8 + 8; c_row += 2) { - for (c_col = 0; c_col < lda; c_col+=4) { - c1 = C[c_row*lda+c_col]; - c2 = C[(c_row+1)*lda+c_col]; - c3 = C[c_row*lda+c_col+1]; - c4 = C[(c_row+1)*lda+c_col+1]; - c5 = C[c_row*lda+c_col+2]; - c6 = C[(c_row+1)*lda+c_col+2]; - c7 = C[c_row*lda+c_col+3]; - c8 = C[(c_row+1)*lda+c_col+3]; - for (i = useSplit * lda / 4; i < (useSplit + 1) * lda / 4; i+=4) { - a1 = A[c_row*lda+i]; - a2 = A[(c_row+1)*lda+i]; - a3 = A[c_row*lda+i+1]; - a4 = A[(c_row+1)*lda+i+1]; - a5 = A[c_row*lda+i+2]; - a6 = A[(c_row+1)*lda+i+2]; - a7 = A[c_row*lda+i+3]; - a8 = A[(c_row+1)*lda+i+3]; - - c1 += a1 * B[i*lda+c_col]; - c2 += a2 * B[i*lda+c_col]; - - c1 += a3 * B[(i+1)*lda+c_col]; - c2 += a4 * B[(i+1)*lda+c_col]; - - c1 += a5 * B[(i+2)*lda+c_col]; - c2 += a6 * B[(i+2)*lda+c_col]; - - c1 += a7 * B[(i+3)*lda+c_col]; - c2 += a8 * B[(i+3)*lda+c_col]; - - c3 += a1 * B[i*lda+c_col+1]; - c4 += a2 * B[i*lda+c_col+1]; - - c3 += a3 * B[(i+1)*lda+c_col+1]; - c4 += a4 * B[(i+1)*lda+c_col+1]; - - c3 += a5 * B[(i+2)*lda+c_col+1]; - c4 += a6 * B[(i+2)*lda+c_col+1]; - - c3 += a7 * B[(i+3)*lda+c_col+1]; - c4 += a8 * B[(i+3)*lda+c_col+1]; - - c5 += a1 * B[i*lda+c_col+2]; - c6 += a2 * B[i*lda+c_col+2]; - - c5 += a3 * B[(i+1)*lda+c_col+2]; - c6 += a4 * B[(i+1)*lda+c_col+2]; - - c5 += a5 * B[(i+2)*lda+c_col+2]; - c6 += a6 * B[(i+2)*lda+c_col+2]; - - c5 += a7 * B[(i+3)*lda+c_col+2]; - c6 += a8 * B[(i+3)*lda+c_col+2]; - - c7 += a1 * B[i*lda+c_col+3]; - c8 += a2 * B[i*lda+c_col+3]; - - c7 += a3 * B[(i+1)*lda+c_col+3]; - c8 += a4 * B[(i+1)*lda+c_col+3]; - - c7 += a5 * B[(i+2)*lda+c_col+3]; - c8 += a6 * B[(i+2)*lda+c_col+3]; - - c7 += a7 * B[(i+3)*lda+c_col+3]; - c8 += a8 * B[(i+3)*lda+c_col+3]; - } - - C[c_row*lda+c_col] = c1; - C[(c_row+1)*lda+c_col] = c2; - - C[c_row*lda+c_col+1] = c3; - C[(c_row+1)*lda+c_col+1] = c4; - - C[c_row*lda+c_col+2] = c5; - C[(c_row+1)*lda+c_col+2] = c6; - - C[c_row*lda+c_col+3] = c7; - C[(c_row+1)*lda+c_col+3] = c8; - } - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ba_matmul/dataset.h b/mt/ba_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/ba_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/ba_matmul/matmul_gendata.pl b/mt/ba_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/ba_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/ba_matmul/matmul_mi.c b/mt/ba_matmul/matmul_mi.c deleted file mode 100755 index 3f712c1..0000000 --- a/mt/ba_matmul/matmul_mi.c +++ /dev/null @@ -1,271 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -/* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -*/ -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - size_t c_start = lda / ncores * coreid; - size_t c_row; - size_t c_col; - size_t colSplit = 0; - size_t i; - size_t useSplit = 0; - data_t a1; - data_t a2; - data_t a3; - data_t a4; - data_t a5; - data_t a6; - data_t a7; - data_t a8; - data_t c1; - data_t c2; - data_t c3; - data_t c4; - data_t c5; - data_t c6; - data_t c7; - data_t c8; - size_t block; - for (block = 0; block < 2; block++) { - for (colSplit = 0; colSplit < 4; colSplit++) { - useSplit = (coreid == 0) ? colSplit : (colSplit + 2 ) % 4; - for (c_row = c_start + block * 8; c_row < c_start + block * 8 + 8; c_row += 2) { - for (c_col = 0; c_col < lda; c_col+=4) { - c1 = C[c_row*lda+c_col]; - c2 = C[(c_row+1)*lda+c_col]; - c3 = C[c_row*lda+c_col+1]; - c4 = C[(c_row+1)*lda+c_col+1]; - c5 = C[c_row*lda+c_col+2]; - c6 = C[(c_row+1)*lda+c_col+2]; - c7 = C[c_row*lda+c_col+3]; - c8 = C[(c_row+1)*lda+c_col+3]; - for (i = useSplit * lda / 4; i < (useSplit + 1) * lda / 4; i+=4) { - a1 = A[c_row*lda+i]; - a2 = A[(c_row+1)*lda+i]; - a3 = A[c_row*lda+i+1]; - a4 = A[(c_row+1)*lda+i+1]; - a5 = A[c_row*lda+i+2]; - a6 = A[(c_row+1)*lda+i+2]; - a7 = A[c_row*lda+i+3]; - a8 = A[(c_row+1)*lda+i+3]; - - c1 += a1 * B[i*lda+c_col]; - c2 += a2 * B[i*lda+c_col]; - - c1 += a3 * B[(i+1)*lda+c_col]; - c2 += a4 * B[(i+1)*lda+c_col]; - - c1 += a5 * B[(i+2)*lda+c_col]; - c2 += a6 * B[(i+2)*lda+c_col]; - - c1 += a7 * B[(i+3)*lda+c_col]; - c2 += a8 * B[(i+3)*lda+c_col]; - - c3 += a1 * B[i*lda+c_col+1]; - c4 += a2 * B[i*lda+c_col+1]; - - c3 += a3 * B[(i+1)*lda+c_col+1]; - c4 += a4 * B[(i+1)*lda+c_col+1]; - - c3 += a5 * B[(i+2)*lda+c_col+1]; - c4 += a6 * B[(i+2)*lda+c_col+1]; - - c3 += a7 * B[(i+3)*lda+c_col+1]; - c4 += a8 * B[(i+3)*lda+c_col+1]; - - c5 += a1 * B[i*lda+c_col+2]; - c6 += a2 * B[i*lda+c_col+2]; - - c5 += a3 * B[(i+1)*lda+c_col+2]; - c6 += a4 * B[(i+1)*lda+c_col+2]; - - c5 += a5 * B[(i+2)*lda+c_col+2]; - c6 += a6 * B[(i+2)*lda+c_col+2]; - - c5 += a7 * B[(i+3)*lda+c_col+2]; - c6 += a8 * B[(i+3)*lda+c_col+2]; - - c7 += a1 * B[i*lda+c_col+3]; - c8 += a2 * B[i*lda+c_col+3]; - - c7 += a3 * B[(i+1)*lda+c_col+3]; - c8 += a4 * B[(i+1)*lda+c_col+3]; - - c7 += a5 * B[(i+2)*lda+c_col+3]; - c8 += a6 * B[(i+2)*lda+c_col+3]; - - c7 += a7 * B[(i+3)*lda+c_col+3]; - c8 += a8 * B[(i+3)*lda+c_col+3]; - } - - C[c_row*lda+c_col] = c1; - C[(c_row+1)*lda+c_col] = c2; - - C[c_row*lda+c_col+1] = c3; - C[(c_row+1)*lda+c_col+1] = c4; - - C[c_row*lda+c_col+2] = c5; - C[(c_row+1)*lda+c_col+2] = c6; - - C[c_row*lda+c_col+3] = c7; - C[(c_row+1)*lda+c_col+3] = c8; - } - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ba_vvadd/ba_vvadd.c b/mt/ba_vvadd/ba_vvadd.c deleted file mode 100755 index 90aec9d..0000000 --- a/mt/ba_vvadd/ba_vvadd.c +++ /dev/null @@ -1,168 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - size_t start = n * coreid / ncores; - size_t end = (coreid == ncores - 1) ? n : n * (coreid+1)/ ncores; - for (i = start; i < end; i++) { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/ba_vvadd/dataset.h b/mt/ba_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/ba_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/ba_vvadd/vvadd_gendata.pl b/mt/ba_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/ba_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bb_matmul.c b/mt/bb_matmul.c new file mode 100755 index 0000000..c335ed3 --- /dev/null +++ b/mt/bb_matmul.c @@ -0,0 +1,35 @@ +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k; + + for (i = 0; i < lda; i += 2) { + for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { + register data_t c00 = 0, c01 = 0; + register data_t c10 = 0, c11 = 0; + register data_t c20 = 0, c21 = 0; + register data_t c30 = 0, c31 = 0; + + register data_t a0, a1, a2, a3, b0, b1; + for (k = 0; k < lda; k++) { + a0 = A[j*lda + k + 0*lda]; + a1 = A[j*lda + k + 1*lda]; + a2 = A[j*lda + k + 2*lda]; + a3 = A[j*lda + k + 3*lda]; + + b0 = B[k*lda + i + 0]; + b1 = B[k*lda + i + 1]; + + c00 += a0 * b0; c01 += a0 * b1; + c10 += a1 * b0; c11 += a1 * b1; + c20 += a2 * b0; c21 += a2 * b1; + c30 += a3 * b0; c31 += a3 * b1; + } + + C[i + j*lda + 0 + 0*lda] = c00; C[i + j*lda + 1 + 0*lda] = c01; + C[i + j*lda + 0 + 1*lda] = c10; C[i + j*lda + 1 + 1*lda] = c11; + C[i + j*lda + 0 + 2*lda] = c20; C[i + j*lda + 1 + 2*lda] = c21; + C[i + j*lda + 0 + 3*lda] = c30; C[i + j*lda + 1 + 3*lda] = c31; + } + } +} diff --git a/mt/bb_matmul/bb_matmul.c b/mt/bb_matmul/bb_matmul.c deleted file mode 100755 index d9b2add..0000000 --- a/mt/bb_matmul/bb_matmul.c +++ /dev/null @@ -1,273 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul_msi(const int lda, const data_t A[], const data_t B[], data_t C[] ) { - int i, j, k; - - for (i = 0; i < lda; i += 2) { - for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { - //for (j = 0; j < lda; j += 4) { - register data_t c00 = 0, c01 = 0; - register data_t c10 = 0, c11 = 0; - register data_t c20 = 0, c21 = 0; - register data_t c30 = 0, c31 = 0; - - register data_t a0, a1, a2, a3, b0, b1; - for (k = 0; k < lda; k++) { - a0 = A[j*lda + k + 0*lda]; - a1 = A[j*lda + k + 1*lda]; - a2 = A[j*lda + k + 2*lda]; - a3 = A[j*lda + k + 3*lda]; - - b0 = B[k*lda + i + 0]; - b1 = B[k*lda + i + 1]; - /*if (coreid == 0) { - printf("i = %d; j = %d; k = %d\n", i, j, k); - printf("%d += %d * %d; %d += %d * %d\n", (int)c00, (int)a0, (int)b0, (int)c01, (int)a0, (int)b1); - printf("%d += %d * %d; %d += %d * %d\n", (int)c10, (int)a1, (int)b0, (int)c11, (int)a1, (int)b1); - printf("%d += %d * %d; %d += %d * %d\n", (int)c20, (int)a2, (int)b0, (int)c21, (int)a2, (int)b1); - printf("%d += %d * %d; %d += %d * %d\n", (int)c30, (int)a3, (int)b0, (int)c31, (int)a3, (int)b1); - printf("\n"); - }*/ - - c00 += a0 * b0; c01 += a0 * b1; - c10 += a1 * b0; c11 += a1 * b1; - c20 += a2 * b0; c21 += a2 * b1; - c30 += a3 * b0; c31 += a3 * b1; - } - - C[i + j*lda + 0 + 0*lda] = c00; C[i + j*lda + 1 + 0*lda] = c01; - C[i + j*lda + 0 + 1*lda] = c10; C[i + j*lda + 1 + 1*lda] = c11; - C[i + j*lda + 0 + 2*lda] = c20; C[i + j*lda + 1 + 2*lda] = c21; - C[i + j*lda + 0 + 3*lda] = c30; C[i + j*lda + 1 + 3*lda] = c31; - } - } -} - -void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const data_t B[], data_t C[] ) { - int i, j, k; - - int curhalf = coreid; - for (i = 0; i < lda; i += 2) { - for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { - register float c00 = 0, c01 = 0; - register float c10 = 0, c11 = 0; - register float c20 = 0, c21 = 0; - register float c30 = 0, c31 = 0; - - register float a0, a1, a2, a3, b0, b1; - for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) { - a0 = A[j*lda + k + 0*lda]; - a1 = A[j*lda + k + 1*lda]; - a2 = A[j*lda + k + 2*lda]; - a3 = A[j*lda + k + 3*lda]; - - b0 = B[k*lda + i + 0]; - b1 = B[k*lda + i + 1]; - - c00 += a0 * b0; c01 += a0 * b1; - c10 += a1 * b0; c11 += a1 * b1; - c20 += a2 * b0; c21 += a2 * b1; - c30 += a3 * b0; c31 += a3 * b1; - } - - C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01; - C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11; - C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21; - C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31; - } - } - - barrier(ncores); - curhalf++; - curhalf %= ncores; - - for (i = 0; i < lda; i += 2) { - for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { - register float c00 = 0, c01 = 0; - register float c10 = 0, c11 = 0; - register float c20 = 0, c21 = 0; - register float c30 = 0, c31 = 0; - - register float a0, a1, a2, a3, b0, b1; - for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) { - a0 = A[j*lda + k + 0*lda]; - a1 = A[j*lda + k + 1*lda]; - a2 = A[j*lda + k + 2*lda]; - a3 = A[j*lda + k + 3*lda]; - - b0 = B[k*lda + i + 0]; - b1 = B[k*lda + i + 1]; - - c00 += a0 * b0; c01 += a0 * b1; - c10 += a1 * b0; c11 += a1 * b1; - c20 += a2 * b0; c21 += a2 * b1; - c30 += a3 * b0; c31 += a3 * b1; - } - - C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01; - C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11; - C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21; - C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31; - } - } -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - matmul_msi(lda, A, B, C); -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bb_matmul/dataset.h b/mt/bb_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bb_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bb_matmul/matmul_gendata.pl b/mt/bb_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bb_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bb_matmul/matmul_mi.c b/mt/bb_matmul/matmul_mi.c deleted file mode 100755 index 346f178..0000000 --- a/mt/bb_matmul/matmul_mi.c +++ /dev/null @@ -1,273 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul_msi(const int lda, const data_t A[], const data_t B[], data_t C[] ) { - int i, j, k; - - for (i = 0; i < lda; i += 2) { - for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { - //for (j = 0; j < lda; j += 4) { - register data_t c00 = 0, c01 = 0; - register data_t c10 = 0, c11 = 0; - register data_t c20 = 0, c21 = 0; - register data_t c30 = 0, c31 = 0; - - register data_t a0, a1, a2, a3, b0, b1; - for (k = 0; k < lda; k++) { - a0 = A[j*lda + k + 0*lda]; - a1 = A[j*lda + k + 1*lda]; - a2 = A[j*lda + k + 2*lda]; - a3 = A[j*lda + k + 3*lda]; - - b0 = B[k*lda + i + 0]; - b1 = B[k*lda + i + 1]; - /*if (coreid == 0) { - printf("i = %d; j = %d; k = %d\n", i, j, k); - printf("%d += %d * %d; %d += %d * %d\n", (int)c00, (int)a0, (int)b0, (int)c01, (int)a0, (int)b1); - printf("%d += %d * %d; %d += %d * %d\n", (int)c10, (int)a1, (int)b0, (int)c11, (int)a1, (int)b1); - printf("%d += %d * %d; %d += %d * %d\n", (int)c20, (int)a2, (int)b0, (int)c21, (int)a2, (int)b1); - printf("%d += %d * %d; %d += %d * %d\n", (int)c30, (int)a3, (int)b0, (int)c31, (int)a3, (int)b1); - printf("\n"); - }*/ - - c00 += a0 * b0; c01 += a0 * b1; - c10 += a1 * b0; c11 += a1 * b1; - c20 += a2 * b0; c21 += a2 * b1; - c30 += a3 * b0; c31 += a3 * b1; - } - - C[i + j*lda + 0 + 0*lda] = c00; C[i + j*lda + 1 + 0*lda] = c01; - C[i + j*lda + 0 + 1*lda] = c10; C[i + j*lda + 1 + 1*lda] = c11; - C[i + j*lda + 0 + 2*lda] = c20; C[i + j*lda + 1 + 2*lda] = c21; - C[i + j*lda + 0 + 3*lda] = c30; C[i + j*lda + 1 + 3*lda] = c31; - } - } -} - -void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const data_t B[], data_t C[] ) { - int i, j, k; - - int curhalf = coreid; - for (i = 0; i < lda; i += 2) { - for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { - register float c00 = 0, c01 = 0; - register float c10 = 0, c11 = 0; - register float c20 = 0, c21 = 0; - register float c30 = 0, c31 = 0; - - register float a0, a1, a2, a3, b0, b1; - for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) { - a0 = A[j*lda + k + 0*lda]; - a1 = A[j*lda + k + 1*lda]; - a2 = A[j*lda + k + 2*lda]; - a3 = A[j*lda + k + 3*lda]; - - b0 = B[k*lda + i + 0]; - b1 = B[k*lda + i + 1]; - - c00 += a0 * b0; c01 += a0 * b1; - c10 += a1 * b0; c11 += a1 * b1; - c20 += a2 * b0; c21 += a2 * b1; - c30 += a3 * b0; c31 += a3 * b1; - } - - C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01; - C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11; - C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21; - C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31; - } - } - - barrier(nc); - curhalf++; - curhalf %= ncores; - - for (i = 0; i < lda; i += 2) { - for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) { - register float c00 = 0, c01 = 0; - register float c10 = 0, c11 = 0; - register float c20 = 0, c21 = 0; - register float c30 = 0, c31 = 0; - - register float a0, a1, a2, a3, b0, b1; - for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) { - a0 = A[j*lda + k + 0*lda]; - a1 = A[j*lda + k + 1*lda]; - a2 = A[j*lda + k + 2*lda]; - a3 = A[j*lda + k + 3*lda]; - - b0 = B[k*lda + i + 0]; - b1 = B[k*lda + i + 1]; - - c00 += a0 * b0; c01 += a0 * b1; - c10 += a1 * b0; c11 += a1 * b1; - c20 += a2 * b0; c21 += a2 * b1; - c30 += a3 * b0; c31 += a3 * b1; - } - - C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01; - C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11; - C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21; - C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31; - } - } -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - matmul_mi(lda, A, B, C); -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bb_vvadd/bb_vvadd.c b/mt/bb_vvadd/bb_vvadd.c deleted file mode 100755 index 664ce35..0000000 --- a/mt/bb_vvadd/bb_vvadd.c +++ /dev/null @@ -1,167 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - for (i = coreid * (n / ncores); i < (coreid + 1) * (n / ncores); i++) { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bb_vvadd/dataset.h b/mt/bb_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bb_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bb_vvadd/vvadd_gendata.pl b/mt/bb_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bb_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bc_matmul.c b/mt/bc_matmul.c new file mode 100755 index 0000000..61c4054 --- /dev/null +++ b/mt/bc_matmul.c @@ -0,0 +1,137 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" + +#define REG_I 8 +#define REG_J 2 +//#define BLOCK_I 32 +#define BLOCK_J 16 +#define BLOCK_K 16 +#define LDA 32 +#define NCORES 2 +#define MIN(X,Y) (X < Y ? X : Y) + +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + int i, j, k, ri, rj, ii, jj, kk; + data_t *Aj, *Cj, *Bi; + data_t c[REG_I][REG_J], a[REG_J], b[REG_I]; + size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES)); + + /* if (coreid > 0) { */ + /* return; */ + /* } */ + /* start = 0, end = lda; */ + if (ncores == NCORES && lda == LDA) { + for (jj = start; jj < end; jj += BLOCK_J) + for (kk = 0; kk < LDA; kk += BLOCK_K) + //for (ii = 0; ii < LDA; ii += BLOCK_I) + for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { + Aj = A + j*LDA; + Cj = C + j*LDA; + for (i = 0; i < LDA; i += REG_I) { + /* Load C in register blocks. */ + Bi = B + i; + for (ri = 0; ri < REG_I; ri++) { + for (rj = 0; rj < REG_J; rj++) { + c[ri][rj] = Cj[i + ri + ( rj)*LDA]; + } + } + + + for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { + /* Load a,b in register blocks. */ + /* for (rj = 0; rj < REG_J; rj++) { + a[rj] = A[(j + rj)*LDA + k]; + }*/ + /* for (ri = 0; ri < REG_I; ri++) { */ + /* b[ri] = Bi[k*LDA + ri]; */ + /* } */ + /* /\* Compute C in register blocks. *\/ */ + /* for (rj = 0; rj < REG_J; rj++) { */ + /* a[rj] = Aj[( rj)*LDA + k]; */ + /* for (ri = 0; ri < REG_I; ri++) { */ + /* c[ri][rj] += a[rj] * b[ri]; */ + /* } */ + /* } */ + a[0] = Aj[k]; + a[1] = Aj[k + LDA]; + b[0] = Bi[k*LDA]; + b[1] = Bi[k*LDA + 1]; + b[2] = Bi[k*LDA + 2]; + b[3] = Bi[k*LDA + 3]; + b[4] = Bi[k*LDA + 4]; + b[5] = Bi[k*LDA + 5]; + b[6] = Bi[k*LDA + 6]; + b[7] = Bi[k*LDA + 7]; + + + c[0][0] += b[0] * a[0]; + c[0][1] += b[0] * a[1]; + c[1][0] += b[1] * a[0]; + c[1][1] += b[1] * a[1]; + c[2][0] += b[2] * a[0]; + c[2][1] += b[2] * a[1]; + c[3][0] += b[3] * a[0]; + c[3][1] += b[3] * a[1]; + c[4][0] += b[4] * a[0]; + c[4][1] += b[4] * a[1]; + c[5][0] += b[5] * a[0]; + c[5][1] += b[5] * a[1]; + c[6][0] += b[6] * a[0]; + c[6][1] += b[6] * a[1]; + c[7][0] += b[7] * a[0]; + c[7][1] += b[7] * a[1]; + + + /* c[0][0] += b[0] * a[0]; */ + /* c[1][1] += b[1] * a[1]; */ + /* c[2][0] += b[2] * a[0]; */ + /* c[3][1] += b[3] * a[1]; */ + /* c[4][0] += b[4] * a[0]; */ + /* c[5][1] += b[5] * a[1]; */ + /* c[6][0] += b[6] * a[0]; */ + /* c[7][1] += b[7] * a[1]; */ + /* c[0][0] += b[0] * a[0]; */ + /* c[1][1] += b[1] * a[1]; */ + /* c[2][0] += b[2] * a[0]; */ + /* c[3][1] += b[3] * a[1]; */ + /* c[4][0] += b[4] * a[0]; */ + /* c[5][1] += b[5] * a[1]; */ + /* c[6][0] += b[6] * a[0]; */ + /* c[7][1] += b[7] * a[1]; */ + + } + + /* store C in register blocks. */ + for (ri = 0; ri < REG_I; ri++) { + for (rj = 0; rj < REG_J; rj++) { + Cj[i + ri + (rj)*LDA] = c[ri][rj]; + } + } + } + + + + + } + /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */ + } else { + if (coreid > 0) + return; + + for ( i = 0; i < lda; i++ ) + for ( j = 0; j < lda; j++ ) + for ( k = 0; k < lda; k++ ) + C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; + } +} diff --git a/mt/bc_matmul/bc_matmul.c b/mt/bc_matmul/bc_matmul.c deleted file mode 100755 index bac98cb..0000000 --- a/mt/bc_matmul/bc_matmul.c +++ /dev/null @@ -1,287 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - -#define REG_I 8 -#define REG_J 2 -//#define BLOCK_I 32 -#define BLOCK_J 16 -#define BLOCK_K 16 -#define LDA 32 -#define NCORES 2 -#define MIN(X,Y) (X < Y ? X : Y) - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k, ri, rj, ii, jj, kk; - data_t *Aj, *Cj, *Bi; - data_t c[REG_I][REG_J], a[REG_J], b[REG_I]; - size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES)); - - /* if (coreid > 0) { */ - /* return; */ - /* } */ - /* start = 0, end = lda; */ - if (ncores == NCORES && lda == LDA) { - for (jj = start; jj < end; jj += BLOCK_J) - for (kk = 0; kk < LDA; kk += BLOCK_K) - //for (ii = 0; ii < LDA; ii += BLOCK_I) - for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { - Aj = A + j*LDA; - Cj = C + j*LDA; - for (i = 0; i < LDA; i += REG_I) { - /* Load C in register blocks. */ - Bi = B + i; - for (ri = 0; ri < REG_I; ri++) { - for (rj = 0; rj < REG_J; rj++) { - c[ri][rj] = Cj[i + ri + ( rj)*LDA]; - } - } - - - for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { - /* Load a,b in register blocks. */ - /* for (rj = 0; rj < REG_J; rj++) { - a[rj] = A[(j + rj)*LDA + k]; - }*/ - /* for (ri = 0; ri < REG_I; ri++) { */ - /* b[ri] = Bi[k*LDA + ri]; */ - /* } */ - /* /\* Compute C in register blocks. *\/ */ - /* for (rj = 0; rj < REG_J; rj++) { */ - /* a[rj] = Aj[( rj)*LDA + k]; */ - /* for (ri = 0; ri < REG_I; ri++) { */ - /* c[ri][rj] += a[rj] * b[ri]; */ - /* } */ - /* } */ - a[0] = Aj[k]; - a[1] = Aj[k + LDA]; - b[0] = Bi[k*LDA]; - b[1] = Bi[k*LDA + 1]; - b[2] = Bi[k*LDA + 2]; - b[3] = Bi[k*LDA + 3]; - b[4] = Bi[k*LDA + 4]; - b[5] = Bi[k*LDA + 5]; - b[6] = Bi[k*LDA + 6]; - b[7] = Bi[k*LDA + 7]; - - - c[0][0] += b[0] * a[0]; - c[0][1] += b[0] * a[1]; - c[1][0] += b[1] * a[0]; - c[1][1] += b[1] * a[1]; - c[2][0] += b[2] * a[0]; - c[2][1] += b[2] * a[1]; - c[3][0] += b[3] * a[0]; - c[3][1] += b[3] * a[1]; - c[4][0] += b[4] * a[0]; - c[4][1] += b[4] * a[1]; - c[5][0] += b[5] * a[0]; - c[5][1] += b[5] * a[1]; - c[6][0] += b[6] * a[0]; - c[6][1] += b[6] * a[1]; - c[7][0] += b[7] * a[0]; - c[7][1] += b[7] * a[1]; - - - /* c[0][0] += b[0] * a[0]; */ - /* c[1][1] += b[1] * a[1]; */ - /* c[2][0] += b[2] * a[0]; */ - /* c[3][1] += b[3] * a[1]; */ - /* c[4][0] += b[4] * a[0]; */ - /* c[5][1] += b[5] * a[1]; */ - /* c[6][0] += b[6] * a[0]; */ - /* c[7][1] += b[7] * a[1]; */ - /* c[0][0] += b[0] * a[0]; */ - /* c[1][1] += b[1] * a[1]; */ - /* c[2][0] += b[2] * a[0]; */ - /* c[3][1] += b[3] * a[1]; */ - /* c[4][0] += b[4] * a[0]; */ - /* c[5][1] += b[5] * a[1]; */ - /* c[6][0] += b[6] * a[0]; */ - /* c[7][1] += b[7] * a[1]; */ - - } - - /* store C in register blocks. */ - for (ri = 0; ri < REG_I; ri++) { - for (rj = 0; rj < REG_J; rj++) { - Cj[i + ri + (rj)*LDA] = c[ri][rj]; - } - } - } - - - - - } - /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */ - } else { - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - for ( k = 0; k < lda; k++ ) - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// /* // Execute the provided, naive matmul */ -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bc_matmul/dataset.h b/mt/bc_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bc_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bc_matmul/matmul_gendata.pl b/mt/bc_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bc_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bc_matmul/matmul_mi.c b/mt/bc_matmul/matmul_mi.c deleted file mode 100755 index 35abdc8..0000000 --- a/mt/bc_matmul/matmul_mi.c +++ /dev/null @@ -1,318 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - -#define REG_I 8 -#define REG_J 2 -#define BLOCK_I 32 -#define BLOCK_J 16 -#define BLOCK_K 16 -#define LDA 32 -#define NCORES 2 -#define MIN(X,Y) (X < Y ? X : Y) - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k, ri, rj, ii, jj, kk; - data_t *Aj, *Cj, *Bi; - data_t c[REG_I][REG_J], a[REG_J], b[REG_I]; - size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES)); - - /* if (coreid > 0) { */ - /* return; */ - /* } */ - /* start = 0, end = lda; */ - if (ncores == NCORES && lda == LDA) { - for (jj = start; jj < end; jj += BLOCK_J) { - int kk_start= (coreid == 0 ? 0 : LDA/2) ,kk_end = (coreid == 0 ? LDA/2 : LDA); - for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { - // for (ii = 0; ii < LDA; ii += BLOCK_I) - for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { - Aj = A + j*LDA; - Cj = C + j*LDA; - for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) { - /* Load C in register blocks. */ - Bi = B + i; - for (ri = 0; ri < REG_I; ri++) { - for (rj = 0; rj < REG_J; rj++) { - c[ri][rj] = Cj[i + ri + ( rj)*LDA]; - } - } - - - for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { - for (ri = 0; ri < REG_I; ri++) { - b[ri] = Bi[k*LDA + ri]; - } - /* Compute C in register blocks. */ - for (rj = 0; rj < REG_J; rj++) { - a[rj] = Aj[(rj)*LDA + k]; - for (ri = 0; ri < REG_I; ri++) { - c[ri][rj] += a[rj] * b[ri]; - } - } - } - - /* store C in register blocks. */ - for (ri = 0; ri < REG_I; ri++) { - for (rj = 0; rj < REG_J; rj++) { - Cj[i + ri + ( rj)*LDA] = c[ri][rj]; - } - } - } - } - /* barrier(nc); */ - - /* kk_start= (coreid == 1 ? 0 : LDA/2); */ - /* kk_end = (coreid == 1 ? LDA/2 : LDA); */ - /* for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { */ - /* // for (ii = 0; ii < LDA; ii += BLOCK_I) */ - /* for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { */ - /* Aj = A + j*LDA; */ - /* Cj = C + j*LDA; */ - /* for (i = 0; i < LDA/\*, ii + BLOCK_I)*\/; i += REG_I) { */ - /* /\* Load C in register blocks. *\/ */ - /* Bi = B + i; */ - /* for (ri = 0; ri < REG_I; ri++) { */ - /* for (rj = 0; rj < REG_J; rj++) { */ - /* c[ri][rj] = Cj[i + ri + ( rj)*LDA]; */ - /* } */ - /* } */ - - - /* for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { */ - /* for (ri = 0; ri < REG_I; ri++) { */ - /* b[ri] = Bi[k*LDA + ri]; */ - /* } */ - /* /\* Compute C in register blocks. *\/ */ - /* for (rj = 0; rj < REG_J; rj++) { */ - /* a[rj] = Aj[(rj)*LDA + k]; */ - /* for (ri = 0; ri < REG_I; ri++) { */ - /* c[ri][rj] += a[rj] * b[ri]; */ - /* } */ - /* } */ - /* } */ - - /* store C in register blocks. */ - /* for (ri = 0; ri < REG_I; ri++) { */ - /* for (rj = 0; rj < REG_J; rj++) { */ - /* Cj[i + ri + ( rj)*LDA] = c[ri][rj]; */ - /* } */ - /* } */ - /* } */ - /* } */ - } - } - - - //barrier(nc); - for (jj = start; jj < end; jj += BLOCK_J) { - int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA); - for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { - // for (ii = 0; ii < LDA; ii += BLOCK_I) - for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { - Aj = A + j*LDA; - Cj = C + j*LDA; - for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) { - /* Load C in register blocks. */ - Bi = B + i; - for (ri = 0; ri < REG_I; ri++) { - for (rj = 0; rj < REG_J; rj++) { - c[ri][rj] = Cj[i + ri + ( rj)*LDA]; - } - } - - - for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { - for (ri = 0; ri < REG_I; ri++) { - b[ri] = Bi[k*LDA + ri]; - } - /* Compute C in register blocks. */ - for (rj = 0; rj < REG_J; rj++) { - a[rj] = Aj[(rj)*LDA + k]; - for (ri = 0; ri < REG_I; ri++) { - c[ri][rj] += a[rj] * b[ri]; - } - } - } - - /* store C in register blocks. */ - for (ri = 0; ri < REG_I; ri++) { - for (rj = 0; rj < REG_J; rj++) { - Cj[i + ri + ( rj)*LDA] = c[ri][rj]; - } - } - } - } - } - } - /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */ -} else { - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - for ( k = 0; k < lda; k++ ) - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// /* // Execute the provided, naive matmul */ -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bc_vvadd/bc_vvadd.c b/mt/bc_vvadd/bc_vvadd.c deleted file mode 100755 index c7af6b8..0000000 --- a/mt/bc_vvadd/bc_vvadd.c +++ /dev/null @@ -1,172 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - -#define MIN(X,Y) (X < Y ? X : Y) -#define MAX(X,Y) (X > Y ? X : Y) - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i, start = coreid * (n / ncores), end = (coreid == ncores - 1 ? n : (coreid + 1) * (n / ncores)); - - for (i = start; i < end; i++) { - x[i] += y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bc_vvadd/dataset.h b/mt/bc_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bc_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bc_vvadd/vvadd_gendata.pl b/mt/bc_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bc_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/be_matmul/be_matmul.c b/mt/be_matmul/be_matmul.c deleted file mode 100755 index e8bff41..0000000 --- a/mt/be_matmul/be_matmul.c +++ /dev/null @@ -1,314 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k , jj , kk; - int start_i = coreid*lda/2; - int end_i = start_i + lda/2; - int step_j, step_k; - int start_k, end_k, start_j, end_j; - int j_lda; - int pos_A , pos_B, pos_C; - data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07; - data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17; - data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7; - - temp00 = 0; - temp01 = 0; - temp02 = 0; - temp03 = 0; - temp04 = 0; - temp05 = 0; - temp06 = 0; - temp07 = 0; - - temp10 = 0; - temp11 = 0; - temp12 = 0; - temp13 = 0; - temp14 = 0; - temp15 = 0; - temp16 = 0; - temp17 = 0; - - if (coreid == 0) - { - step_k = 1; - start_k= 0; - end_k = lda; - - step_j = 2; - start_j= 0; - end_j = lda; - - }else - { - - step_k = -1; - start_k = lda-1; - end_k = -1; - - step_j = -2; - start_j= lda-2; - end_j = -2; - } - - for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) ) - { - for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) ) - { - for ( i = start_i; i < end_i; i+=8 ) - { - //pos_C = i + jj*lda; - for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j ) - { - - pos_C = i + j*lda; - temp00 = C[(pos_C + 0)]; - temp01 = C[(pos_C + 1)]; - temp02 = C[(pos_C + 2)]; - temp03 = C[(pos_C + 3)]; - temp04 = C[(pos_C + 4)]; - temp05 = C[(pos_C + 5)]; - temp06 = C[(pos_C + 6)]; - temp07 = C[(pos_C + 7)]; - - //pos_C += lda; - pos_C = i + (j+1)*lda; - - temp10 = C[(pos_C + 0)]; - temp11 = C[(pos_C + 1)]; - temp12 = C[(pos_C + 2)]; - temp13 = C[(pos_C + 3)]; - temp14 = C[(pos_C + 4)]; - temp15 = C[(pos_C + 5)]; - temp16 = C[(pos_C + 6)]; - temp17 = C[(pos_C + 7)]; - - pos_B = kk*lda + i; - pos_A = j*lda + kk; - for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k ) - { - temp_A0 = A[ pos_A ] ; - temp_A1 = A[pos_A +lda]; - - temp00 += temp_A0 * B[(pos_B + 0)]; - temp01 += temp_A0 * B[(pos_B + 1)]; - temp02 += temp_A0 * B[(pos_B + 2)]; - temp03 += temp_A0 * B[(pos_B + 3)]; - temp04 += temp_A0 * B[(pos_B + 4)]; - temp05 += temp_A0 * B[(pos_B + 5)]; - temp06 += temp_A0 * B[(pos_B + 6)]; - temp07 += temp_A0 * B[(pos_B + 7)]; - - temp10 += temp_A1 * B[(pos_B + 0)]; - temp11 += temp_A1 * B[(pos_B + 1)]; - temp12 += temp_A1 * B[(pos_B + 2)]; - temp13 += temp_A1 * B[(pos_B + 3)]; - temp14 += temp_A1 * B[(pos_B + 4)]; - temp15 += temp_A1 * B[(pos_B + 5)]; - temp16 += temp_A1 * B[(pos_B + 6)]; - temp17 += temp_A1 * B[(pos_B + 7)]; - - pos_B += (lda*step_k) ; - pos_A += step_k; - } - //barrier(nc); - - C[(pos_C + 0)] = temp10; - C[(pos_C + 1)] = temp11; - C[(pos_C + 2)] = temp12; - C[(pos_C + 3)] = temp13; - C[(pos_C + 4)] = temp14; - C[(pos_C + 5)] = temp15; - C[(pos_C + 6)] = temp16; - C[(pos_C + 7)] = temp17; - //barrier(nc); - - pos_C = i + j*lda; - //pos_C -= lda; - C[(pos_C + 0)] = temp00; - C[(pos_C + 1)] = temp01; - C[(pos_C + 2)] = temp02; - C[(pos_C + 3)] = temp03; - C[(pos_C + 4)] = temp04; - C[(pos_C + 5)] = temp05; - C[(pos_C + 6)] = temp06; - C[(pos_C + 7)] = temp07; - //barrier(nc); - //pos_C += step_j * lda; - } - //barrier(nc); - } - //barrier(nc); - - } - //barrier(nc); - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - /* - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - */ - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - - //printf("input1_data"); -exit(0); - -} diff --git a/mt/be_matmul/dataset.h b/mt/be_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/be_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/be_matmul/matmul_gendata.pl b/mt/be_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/be_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/be_matmul/matmul_mi.c b/mt/be_matmul/matmul_mi.c deleted file mode 100755 index e8bff41..0000000 --- a/mt/be_matmul/matmul_mi.c +++ /dev/null @@ -1,314 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k , jj , kk; - int start_i = coreid*lda/2; - int end_i = start_i + lda/2; - int step_j, step_k; - int start_k, end_k, start_j, end_j; - int j_lda; - int pos_A , pos_B, pos_C; - data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07; - data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17; - data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7; - - temp00 = 0; - temp01 = 0; - temp02 = 0; - temp03 = 0; - temp04 = 0; - temp05 = 0; - temp06 = 0; - temp07 = 0; - - temp10 = 0; - temp11 = 0; - temp12 = 0; - temp13 = 0; - temp14 = 0; - temp15 = 0; - temp16 = 0; - temp17 = 0; - - if (coreid == 0) - { - step_k = 1; - start_k= 0; - end_k = lda; - - step_j = 2; - start_j= 0; - end_j = lda; - - }else - { - - step_k = -1; - start_k = lda-1; - end_k = -1; - - step_j = -2; - start_j= lda-2; - end_j = -2; - } - - for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) ) - { - for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) ) - { - for ( i = start_i; i < end_i; i+=8 ) - { - //pos_C = i + jj*lda; - for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j ) - { - - pos_C = i + j*lda; - temp00 = C[(pos_C + 0)]; - temp01 = C[(pos_C + 1)]; - temp02 = C[(pos_C + 2)]; - temp03 = C[(pos_C + 3)]; - temp04 = C[(pos_C + 4)]; - temp05 = C[(pos_C + 5)]; - temp06 = C[(pos_C + 6)]; - temp07 = C[(pos_C + 7)]; - - //pos_C += lda; - pos_C = i + (j+1)*lda; - - temp10 = C[(pos_C + 0)]; - temp11 = C[(pos_C + 1)]; - temp12 = C[(pos_C + 2)]; - temp13 = C[(pos_C + 3)]; - temp14 = C[(pos_C + 4)]; - temp15 = C[(pos_C + 5)]; - temp16 = C[(pos_C + 6)]; - temp17 = C[(pos_C + 7)]; - - pos_B = kk*lda + i; - pos_A = j*lda + kk; - for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k ) - { - temp_A0 = A[ pos_A ] ; - temp_A1 = A[pos_A +lda]; - - temp00 += temp_A0 * B[(pos_B + 0)]; - temp01 += temp_A0 * B[(pos_B + 1)]; - temp02 += temp_A0 * B[(pos_B + 2)]; - temp03 += temp_A0 * B[(pos_B + 3)]; - temp04 += temp_A0 * B[(pos_B + 4)]; - temp05 += temp_A0 * B[(pos_B + 5)]; - temp06 += temp_A0 * B[(pos_B + 6)]; - temp07 += temp_A0 * B[(pos_B + 7)]; - - temp10 += temp_A1 * B[(pos_B + 0)]; - temp11 += temp_A1 * B[(pos_B + 1)]; - temp12 += temp_A1 * B[(pos_B + 2)]; - temp13 += temp_A1 * B[(pos_B + 3)]; - temp14 += temp_A1 * B[(pos_B + 4)]; - temp15 += temp_A1 * B[(pos_B + 5)]; - temp16 += temp_A1 * B[(pos_B + 6)]; - temp17 += temp_A1 * B[(pos_B + 7)]; - - pos_B += (lda*step_k) ; - pos_A += step_k; - } - //barrier(nc); - - C[(pos_C + 0)] = temp10; - C[(pos_C + 1)] = temp11; - C[(pos_C + 2)] = temp12; - C[(pos_C + 3)] = temp13; - C[(pos_C + 4)] = temp14; - C[(pos_C + 5)] = temp15; - C[(pos_C + 6)] = temp16; - C[(pos_C + 7)] = temp17; - //barrier(nc); - - pos_C = i + j*lda; - //pos_C -= lda; - C[(pos_C + 0)] = temp00; - C[(pos_C + 1)] = temp01; - C[(pos_C + 2)] = temp02; - C[(pos_C + 3)] = temp03; - C[(pos_C + 4)] = temp04; - C[(pos_C + 5)] = temp05; - C[(pos_C + 6)] = temp06; - C[(pos_C + 7)] = temp07; - //barrier(nc); - //pos_C += step_j * lda; - } - //barrier(nc); - } - //barrier(nc); - - } - //barrier(nc); - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - /* - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - */ - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - - //printf("input1_data"); -exit(0); - -} diff --git a/mt/be_vvadd/be_vvadd.c b/mt/be_vvadd/be_vvadd.c deleted file mode 100755 index b1bf72b..0000000 --- a/mt/be_vvadd/be_vvadd.c +++ /dev/null @@ -1,171 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - size_t index; - for (i = 0; i < (n/ncores); i++){ - index = i + coreid*(n/ncores); - x[index] = x[index] + y[index]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/be_vvadd/dataset.h b/mt/be_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/be_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/be_vvadd/vvadd_gendata.pl b/mt/be_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/be_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bf_matmul.c b/mt/bf_matmul.c new file mode 100644 index 0000000..04904b9 --- /dev/null +++ b/mt/bf_matmul.c @@ -0,0 +1,127 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int j, k, i; + data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; + if(coreid == 0) { + for(j = 0; j < 32; j++) { + temp0 = 0; //C[j*lda]; + temp1 = 0; //C[1 + j*lda]; + temp2 = 0; //C[2 + j*lda]; + temp3 = 0; //C[3 + j*lda]; + temp4 = 0; //C[4 + j*lda]; + temp5 = 0; //C[5 + j*lda]; + temp6 = 0; //C[6 + j*lda]; + temp7 = 0; //C[7 + j*lda]; + temp8 = 0; //C[8 + j*lda]; + temp9 = 0; //C[9 + j*lda]; + temp10 = 0; //C[10 + j*lda]; + temp11 = 0; //C[11 + j*lda]; + temp12 = 0; //C[12 + j*lda]; + temp13 = 0; //C[13 + j*lda]; + temp14 = 0; //C[14 + j*lda]; + temp15 = 0; //C[15 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[k*lda]; + temp1 += A[j*lda + k] * B[1+k*lda]; + temp2 += A[j*lda + k] * B[2+k*lda]; + temp3 += A[j*lda + k] * B[3+k*lda]; + temp4 += A[j*lda + k] * B[4+k*lda]; + temp5 += A[j*lda + k] * B[5+k*lda]; + temp6 += A[j*lda + k] * B[6+k*lda]; + temp7 += A[j*lda + k] * B[7+k*lda]; + temp8 += A[j*lda + k] * B[8+k*lda]; + temp9 += A[j*lda + k] * B[9+k*lda]; + temp10 += A[j*lda + k] * B[10+k*lda]; + temp11 += A[j*lda + k] * B[11+k*lda]; + temp12 += A[j*lda + k] * B[12+k*lda]; + temp13 += A[j*lda + k] * B[13+k*lda]; + temp14 += A[j*lda + k] * B[14+k*lda]; + temp15 += A[j*lda + k] * B[15+k*lda]; + } + C[j*lda] = temp0; + C[1 + j*lda] = temp1; + C[2 + j*lda] = temp2; + C[3 + j*lda] = temp3; + C[4 + j*lda] = temp4; + C[5 + j*lda] = temp5; + C[6 + j*lda] = temp6; + C[7 + j*lda] = temp7; + C[8 + j*lda] = temp8; + C[9 + j*lda] = temp9; + C[10 + j*lda] = temp10; + C[11 + j*lda] = temp11; + C[12 + j*lda] = temp12; + C[13 + j*lda] = temp13; + C[14 + j*lda] = temp14; + C[15 + j*lda] = temp15; + } + } + + if(coreid == 1 || ncores == 1) { + for(j = 0; j < 32; j++) { + temp0 = 0; //C[16+j*lda]; + temp1 = 0; //C[17+j*lda]; + temp2 = 0; //C[18+j*lda]; + temp3 = 0; //C[19+j*lda]; + temp4 = 0; //C[20+j*lda]; + temp5 = 0; //C[21+j*lda]; + temp6 = 0; //C[22+j*lda]; + temp7 = 0; //C[23+j*lda]; + temp8 = 0; //C[24+j*lda]; + temp9 = 0; //C[25+j*lda]; + temp10 = 0; //C[26+j*lda]; + temp11 = 0; //C[27+j*lda]; + temp12 = 0; //C[28+j*lda]; + temp13 = 0; //C[29+j*lda]; + temp14 = 0; //C[30+j*lda]; + temp15 = 0; //C[31+j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[16+k*lda]; + temp1 += A[j*lda + k] * B[17+k*lda]; + temp2 += A[j*lda + k] * B[18+k*lda]; + temp3 += A[j*lda + k] * B[19+k*lda]; + temp4 += A[j*lda + k] * B[20+k*lda]; + temp5 += A[j*lda + k] * B[21+k*lda]; + temp6 += A[j*lda + k] * B[22+k*lda]; + temp7 += A[j*lda + k] * B[23+k*lda]; + temp8 += A[j*lda + k] * B[24+k*lda]; + temp9 += A[j*lda + k] * B[25+k*lda]; + temp10 += A[j*lda + k] * B[26+k*lda]; + temp11 += A[j*lda + k] * B[27+k*lda]; + temp12 += A[j*lda + k] * B[28+k*lda]; + temp13 += A[j*lda + k] * B[29+k*lda]; + temp14 += A[j*lda + k] * B[30+k*lda]; + temp15 += A[j*lda + k] * B[31+k*lda]; + } + C[16 + j*lda] = temp0; + C[17 + j*lda] = temp1; + C[18 + j*lda] = temp2; + C[19 + j*lda] = temp3; + C[20 + j*lda] = temp4; + C[21 + j*lda] = temp5; + C[22 + j*lda] = temp6; + C[23 + j*lda] = temp7; + C[24 + j*lda] = temp8; + C[25 + j*lda] = temp9; + C[26 + j*lda] = temp10; + C[27 + j*lda] = temp11; + C[28 + j*lda] = temp12; + C[29 + j*lda] = temp13; + C[30 + j*lda] = temp14; + C[31 + j*lda] = temp15; + } + } + +} diff --git a/mt/bf_matmul/bf_matmul.c b/mt/bf_matmul/bf_matmul.c deleted file mode 100644 index 24fa7e6..0000000 --- a/mt/bf_matmul/bf_matmul.c +++ /dev/null @@ -1,279 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int j, k, i; - data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; - if(coreid == 0) { - for(j = 0; j < 32; j++) { - temp0 = 0; //C[j*lda]; - temp1 = 0; //C[1 + j*lda]; - temp2 = 0; //C[2 + j*lda]; - temp3 = 0; //C[3 + j*lda]; - temp4 = 0; //C[4 + j*lda]; - temp5 = 0; //C[5 + j*lda]; - temp6 = 0; //C[6 + j*lda]; - temp7 = 0; //C[7 + j*lda]; - temp8 = 0; //C[8 + j*lda]; - temp9 = 0; //C[9 + j*lda]; - temp10 = 0; //C[10 + j*lda]; - temp11 = 0; //C[11 + j*lda]; - temp12 = 0; //C[12 + j*lda]; - temp13 = 0; //C[13 + j*lda]; - temp14 = 0; //C[14 + j*lda]; - temp15 = 0; //C[15 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[k*lda]; - temp1 += A[j*lda + k] * B[1+k*lda]; - temp2 += A[j*lda + k] * B[2+k*lda]; - temp3 += A[j*lda + k] * B[3+k*lda]; - temp4 += A[j*lda + k] * B[4+k*lda]; - temp5 += A[j*lda + k] * B[5+k*lda]; - temp6 += A[j*lda + k] * B[6+k*lda]; - temp7 += A[j*lda + k] * B[7+k*lda]; - temp8 += A[j*lda + k] * B[8+k*lda]; - temp9 += A[j*lda + k] * B[9+k*lda]; - temp10 += A[j*lda + k] * B[10+k*lda]; - temp11 += A[j*lda + k] * B[11+k*lda]; - temp12 += A[j*lda + k] * B[12+k*lda]; - temp13 += A[j*lda + k] * B[13+k*lda]; - temp14 += A[j*lda + k] * B[14+k*lda]; - temp15 += A[j*lda + k] * B[15+k*lda]; - } - C[j*lda] = temp0; - C[1 + j*lda] = temp1; - C[2 + j*lda] = temp2; - C[3 + j*lda] = temp3; - C[4 + j*lda] = temp4; - C[5 + j*lda] = temp5; - C[6 + j*lda] = temp6; - C[7 + j*lda] = temp7; - C[8 + j*lda] = temp8; - C[9 + j*lda] = temp9; - C[10 + j*lda] = temp10; - C[11 + j*lda] = temp11; - C[12 + j*lda] = temp12; - C[13 + j*lda] = temp13; - C[14 + j*lda] = temp14; - C[15 + j*lda] = temp15; - } - } - - else { - for(j = 0; j < 32; j++) { - temp0 = 0; //C[16+j*lda]; - temp1 = 0; //C[17+j*lda]; - temp2 = 0; //C[18+j*lda]; - temp3 = 0; //C[19+j*lda]; - temp4 = 0; //C[20+j*lda]; - temp5 = 0; //C[21+j*lda]; - temp6 = 0; //C[22+j*lda]; - temp7 = 0; //C[23+j*lda]; - temp8 = 0; //C[24+j*lda]; - temp9 = 0; //C[25+j*lda]; - temp10 = 0; //C[26+j*lda]; - temp11 = 0; //C[27+j*lda]; - temp12 = 0; //C[28+j*lda]; - temp13 = 0; //C[29+j*lda]; - temp14 = 0; //C[30+j*lda]; - temp15 = 0; //C[31+j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[16+k*lda]; - temp1 += A[j*lda + k] * B[17+k*lda]; - temp2 += A[j*lda + k] * B[18+k*lda]; - temp3 += A[j*lda + k] * B[19+k*lda]; - temp4 += A[j*lda + k] * B[20+k*lda]; - temp5 += A[j*lda + k] * B[21+k*lda]; - temp6 += A[j*lda + k] * B[22+k*lda]; - temp7 += A[j*lda + k] * B[23+k*lda]; - temp8 += A[j*lda + k] * B[24+k*lda]; - temp9 += A[j*lda + k] * B[25+k*lda]; - temp10 += A[j*lda + k] * B[26+k*lda]; - temp11 += A[j*lda + k] * B[27+k*lda]; - temp12 += A[j*lda + k] * B[28+k*lda]; - temp13 += A[j*lda + k] * B[29+k*lda]; - temp14 += A[j*lda + k] * B[30+k*lda]; - temp15 += A[j*lda + k] * B[31+k*lda]; - } - C[16 + j*lda] = temp0; - C[17 + j*lda] = temp1; - C[18 + j*lda] = temp2; - C[19 + j*lda] = temp3; - C[20 + j*lda] = temp4; - C[21 + j*lda] = temp5; - C[22 + j*lda] = temp6; - C[23 + j*lda] = temp7; - C[24 + j*lda] = temp8; - C[25 + j*lda] = temp9; - C[26 + j*lda] = temp10; - C[27 + j*lda] = temp11; - C[28 + j*lda] = temp12; - C[29 + j*lda] = temp13; - C[30 + j*lda] = temp14; - C[31 + j*lda] = temp15; - } - } - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bf_matmul/dataset.h b/mt/bf_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bf_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bf_matmul/matmul_gendata.pl b/mt/bf_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bf_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bf_matmul/matmul_mi.c b/mt/bf_matmul/matmul_mi.c deleted file mode 100755 index a063df0..0000000 --- a/mt/bf_matmul/matmul_mi.c +++ /dev/null @@ -1,392 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int j, k; - data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; - if(coreid == 0) { - //16*0:16*(0+1) ;; 16*1+16*(1+1) - //0:16 ;; 16:32 - - //complete Q1 - for(j = 0; j < 16; j++) { - temp0 = C[j*lda]; - temp1 = C[1 + j*lda]; - temp2 = C[2 + j*lda]; - temp3 = C[3 + j*lda]; - temp4 = C[4 + j*lda]; - temp5 = C[5 + j*lda]; - temp6 = C[6 + j*lda]; - temp7 = C[7 + j*lda]; - temp8 = C[8 + j*lda]; - temp9 = C[9 + j*lda]; - temp10 = C[10 + j*lda]; - temp11 = C[11 + j*lda]; - temp12 = C[12 + j*lda]; - temp13 = C[13 + j*lda]; - temp14 = C[14 + j*lda]; - temp15 = C[15 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[k*lda]; - temp1 += A[j*lda + k] * B[1+k*lda]; - temp2 += A[j*lda + k] * B[2+k*lda]; - temp3 += A[j*lda + k] * B[3+k*lda]; - temp4 += A[j*lda + k] * B[4+k*lda]; - temp5 += A[j*lda + k] * B[5+k*lda]; - temp6 += A[j*lda + k] * B[6+k*lda]; - temp7 += A[j*lda + k] * B[7+k*lda]; - temp8 += A[j*lda + k] * B[8+k*lda]; - temp9 += A[j*lda + k] * B[9+k*lda]; - temp10 += A[j*lda + k] * B[10+k*lda]; - temp11 += A[j*lda + k] * B[11+k*lda]; - temp12 += A[j*lda + k] * B[12+k*lda]; - temp13 += A[j*lda + k] * B[13+k*lda]; - temp14 += A[j*lda + k] * B[14+k*lda]; - temp15 += A[j*lda + k] * B[15+k*lda]; - } - C[j*lda] = temp0; - C[1 + j*lda] = temp1; - C[2 + j*lda] = temp2; - C[3 + j*lda] = temp3; - C[4 + j*lda] = temp4; - C[5 + j*lda] = temp5; - C[6 + j*lda] = temp6; - C[7 + j*lda] = temp7; - C[8 + j*lda] = temp8; - C[9 + j*lda] = temp9; - C[10 + j*lda] = temp10; - C[11 + j*lda] = temp11; - C[12 + j*lda] = temp12; - C[13 + j*lda] = temp13; - C[14 + j*lda] = temp14; - C[15 + j*lda] = temp15; - } - for(j = 16; j < 32; j++) { - temp0 = C[j*lda]; - temp1 = C[1 + j*lda]; - temp2 = C[2 + j*lda]; - temp3 = C[3 + j*lda]; - temp4 = C[4 + j*lda]; - temp5 = C[5 + j*lda]; - temp6 = C[6 + j*lda]; - temp7 = C[7 + j*lda]; - temp8 = C[8 + j*lda]; - temp9 = C[9 + j*lda]; - temp10 = C[10 + j*lda]; - temp11 = C[11 + j*lda]; - temp12 = C[12 + j*lda]; - temp13 = C[13 + j*lda]; - temp14 = C[14 + j*lda]; - temp15 = C[15 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[k*lda]; - temp1 += A[j*lda + k] * B[1+k*lda]; - temp2 += A[j*lda + k] * B[2+k*lda]; - temp3 += A[j*lda + k] * B[3+k*lda]; - temp4 += A[j*lda + k] * B[4+k*lda]; - temp5 += A[j*lda + k] * B[5+k*lda]; - temp6 += A[j*lda + k] * B[6+k*lda]; - temp7 += A[j*lda + k] * B[7+k*lda]; - temp8 += A[j*lda + k] * B[8+k*lda]; - temp9 += A[j*lda + k] * B[9+k*lda]; - temp10 += A[j*lda + k] * B[10+k*lda]; - temp11 += A[j*lda + k] * B[11+k*lda]; - temp12 += A[j*lda + k] * B[12+k*lda]; - temp13 += A[j*lda + k] * B[13+k*lda]; - temp14 += A[j*lda + k] * B[14+k*lda]; - temp15 += A[j*lda + k] * B[15+k*lda]; - } - C[j*lda] = temp0; - C[1 + j*lda] = temp1; - C[2 + j*lda] = temp2; - C[3 + j*lda] = temp3; - C[4 + j*lda] = temp4; - C[5 + j*lda] = temp5; - C[6 + j*lda] = temp6; - C[7 + j*lda] = temp7; - C[8 + j*lda] = temp8; - C[9 + j*lda] = temp9; - C[10 + j*lda] = temp10; - C[11 + j*lda] = temp11; - C[12 + j*lda] = temp12; - C[13 + j*lda] = temp13; - C[14 + j*lda] = temp14; - C[15 + j*lda] = temp15; - } - } - //16*(2-1) : 16*2 ;; 16*(1-1) : 16*1 - //16:32 ;; 0:16 - if(coreid == 1) { - //complete Q3 - for(j = 16; j < 32; j++) { - temp0 = C[16+j*lda]; - temp1 = C[17+j*lda]; - temp2 = C[18+j*lda]; - temp3 = C[19+j*lda]; - temp4 = C[20+j*lda]; - temp5 = C[21+j*lda]; - temp6 = C[22+j*lda]; - temp7 = C[23+j*lda]; - temp8 = C[24+j*lda]; - temp9 = C[25+j*lda]; - temp10 = C[26+j*lda]; - temp11 = C[27+j*lda]; - temp12 = C[28+j*lda]; - temp13 = C[29+j*lda]; - temp14 = C[30+j*lda]; - temp15 = C[31+j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[16+k*lda]; - temp1 += A[j*lda + k] * B[17+k*lda]; - temp2 += A[j*lda + k] * B[18+k*lda]; - temp3 += A[j*lda + k] * B[19+k*lda]; - temp4 += A[j*lda + k] * B[20+k*lda]; - temp5 += A[j*lda + k] * B[21+k*lda]; - temp6 += A[j*lda + k] * B[22+k*lda]; - temp7 += A[j*lda + k] * B[23+k*lda]; - temp8 += A[j*lda + k] * B[24+k*lda]; - temp9 += A[j*lda + k] * B[25+k*lda]; - temp10 += A[j*lda + k] * B[26+k*lda]; - temp11 += A[j*lda + k] * B[27+k*lda]; - temp12 += A[j*lda + k] * B[28+k*lda]; - temp13 += A[j*lda + k] * B[29+k*lda]; - temp14 += A[j*lda + k] * B[30+k*lda]; - temp15 += A[j*lda + k] * B[31+k*lda]; - } - C[16 + j*lda] = temp0; - C[17 + j*lda] = temp1; - C[18 + j*lda] = temp2; - C[19 + j*lda] = temp3; - C[20 + j*lda] = temp4; - C[21 + j*lda] = temp5; - C[22 + j*lda] = temp6; - C[23 + j*lda] = temp7; - C[24 + j*lda] = temp8; - C[25 + j*lda] = temp9; - C[26 + j*lda] = temp10; - C[27 + j*lda] = temp11; - C[28 + j*lda] = temp12; - C[29 + j*lda] = temp13; - C[30 + j*lda] = temp14; - C[31 + j*lda] = temp15; - } - //complete Q4 - for(j = 0; j < 16; j++) { - temp0 = C[16 + j*lda]; - temp1 = C[17 + j*lda]; - temp2 = C[18 + j*lda]; - temp3 = C[19 + j*lda]; - temp4 = C[20 + j*lda]; - temp5 = C[21 + j*lda]; - temp6 = C[22 + j*lda]; - temp7 = C[23 + j*lda]; - temp8 = C[24 + j*lda]; - temp9 = C[25 + j*lda]; - temp10 = C[26 + j*lda]; - temp11 = C[27 + j*lda]; - temp12 = C[28 + j*lda]; - temp13 = C[29 + j*lda]; - temp14 = C[30 + j*lda]; - temp15 = C[31 + j*lda]; - for(k = 0; k < 32; k++) { - temp0 += A[j*lda + k] * B[16 + k*lda]; - temp1 += A[j*lda + k] * B[17 + k*lda]; - temp2 += A[j*lda + k] * B[18 + k*lda]; - temp3 += A[j*lda + k] * B[19 + k*lda]; - temp4 += A[j*lda + k] * B[20 + k*lda]; - temp5 += A[j*lda + k] * B[21 + k*lda]; - temp6 += A[j*lda + k] * B[22 + k*lda]; - temp7 += A[j*lda + k] * B[23 + k*lda]; - temp8 += A[j*lda + k] * B[24 + k*lda]; - temp9 += A[j*lda + k] * B[25 + k*lda]; - temp10 += A[j*lda + k] * B[26 + k*lda]; - temp11 += A[j*lda + k] * B[27 + k*lda]; - temp12 += A[j*lda + k] * B[28 + k*lda]; - temp13 += A[j*lda + k] * B[29 + k*lda]; - temp14 += A[j*lda + k] * B[30 + k*lda]; - temp15 += A[j*lda + k] * B[31 + k*lda]; - } - C[16 + j*lda] = temp0; - C[17 + j*lda] = temp1; - C[18 + j*lda] = temp2; - C[19 + j*lda] = temp3; - C[20 + j*lda] = temp4; - C[21 + j*lda] = temp5; - C[22 + j*lda] = temp6; - C[23 + j*lda] = temp7; - C[24 + j*lda] = temp8; - C[25 + j*lda] = temp9; - C[26 + j*lda] = temp10; - C[27 + j*lda] = temp11; - C[28 + j*lda] = temp12; - C[29 + j*lda] = temp13; - C[30 + j*lda] = temp14; - C[31 + j*lda] = temp15; - } - } - - - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bf_vvadd/bf_vvadd.c b/mt/bf_vvadd/bf_vvadd.c deleted file mode 100755 index f783ee1..0000000 --- a/mt/bf_vvadd/bf_vvadd.c +++ /dev/null @@ -1,180 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - if (coreid == 0) - { - for (i = 0; i < n/2; i++) - { - x[i] = x[i] + y[i]; - } - } - if (coreid == 1) - { - for (i = n/2; i < n; i++) - { - x[i] = x[i] + y[i]; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bf_vvadd/dataset.h b/mt/bf_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bf_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bf_vvadd/vvadd_gendata.pl b/mt/bf_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bf_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bh_matmul.c b/mt/bh_matmul.c new file mode 100755 index 0000000..c8d6f2b --- /dev/null +++ b/mt/bh_matmul.c @@ -0,0 +1,97 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + if(coreid > 1) return; + + int m, i, j, k, iB0, iB1; + data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; + data_t tempA0, tempA1; + + if (coreid == 0){ + for (m = 0; m < 2; m++){ + for (j = 0; j < lda/2; j++){ + for (i = 0; i < lda; i+=8){ + tempC0 = C[i + j*lda]; + tempC1 = C[i + j*lda+1]; + tempC2 = C[i + j*lda+2]; + tempC3 = C[i + j*lda+3]; + tempC4 = C[i + j*lda+4]; + tempC5 = C[i + j*lda+5]; + tempC6 = C[i + j*lda+6]; + tempC7 = C[i + j*lda+7]; + iB0 = m*lda*lda/2+i; + iB1 = iB0+lda; + for (k = m*lda/2; k < (m+1)*lda/2; k+=2){ + tempA0 = A[j*lda+k]; + tempA1 = A[j*lda+k+1]; + tempC0 += tempA0*B[iB0]+tempA1*B[iB1]; + tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1]; + tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2]; + tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3]; + tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4]; + tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5]; + tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6]; + tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7]; + iB0 += 2*lda; + iB1 += 2*lda; + + } + C[i + j*lda] = tempC0; + C[i + j*lda + 1] = tempC1; + C[i + j*lda + 2] = tempC2; + C[i + j*lda + 3] = tempC3; + C[i + j*lda + 4] = tempC4; + C[i + j*lda + 5] = tempC5; + C[i + j*lda + 6] = tempC6; + C[i + j*lda + 7] = tempC7; + } + } + } + } + if(coreid == 1 || ncores == 1) { + for (m = 2; m > 0; m--){ + for (j = lda-1; j >= lda/2; j--){ + for (i = lda-1; i >= 0; i-=8){ + tempC0 = C[i + j*lda]; + tempC1 = C[i + j*lda - 1]; + tempC2 = C[i + j*lda - 2]; + tempC3 = C[i + j*lda - 3]; + tempC4 = C[i + j*lda - 4]; + tempC5 = C[i + j*lda - 5]; + tempC6 = C[i + j*lda - 6]; + tempC7 = C[i + j*lda - 7]; + for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){ + tempA0 = A[j*lda+k]; + tempA1 = A[j*lda+k-1]; + tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i]; + tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1]; + tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2]; + tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3]; + tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4]; + tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5]; + tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6]; + tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7]; + } + C[i + j*lda] = tempC0; + C[i + j*lda - 1] = tempC1; + C[i + j*lda - 2] = tempC2; + C[i + j*lda - 3] = tempC3; + C[i + j*lda - 4] = tempC4; + C[i + j*lda - 5] = tempC5; + C[i + j*lda - 6] = tempC6; + C[i + j*lda - 7] = tempC7; + } + } + } + } +} diff --git a/mt/bh_matmul/bh_matmul.c b/mt/bh_matmul/bh_matmul.c deleted file mode 100755 index a496bba..0000000 --- a/mt/bh_matmul/bh_matmul.c +++ /dev/null @@ -1,248 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int m, i, j, k, iB0, iB1; - data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; - data_t tempA0, tempA1; - - if (coreid == 0){ - for (m = 0; m < 2; m++){ - for (j = 0; j < lda/2; j++){ - for (i = 0; i < lda; i+=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda+1]; - tempC2 = C[i + j*lda+2]; - tempC3 = C[i + j*lda+3]; - tempC4 = C[i + j*lda+4]; - tempC5 = C[i + j*lda+5]; - tempC6 = C[i + j*lda+6]; - tempC7 = C[i + j*lda+7]; - iB0 = m*lda*lda/2+i; - iB1 = iB0+lda; - for (k = m*lda/2; k < (m+1)*lda/2; k+=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k+1]; - tempC0 += tempA0*B[iB0]+tempA1*B[iB1]; - tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1]; - tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2]; - tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3]; - tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4]; - tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5]; - tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6]; - tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7]; - iB0 += 2*lda; - iB1 += 2*lda; - - } - C[i + j*lda] = tempC0; - C[i + j*lda + 1] = tempC1; - C[i + j*lda + 2] = tempC2; - C[i + j*lda + 3] = tempC3; - C[i + j*lda + 4] = tempC4; - C[i + j*lda + 5] = tempC5; - C[i + j*lda + 6] = tempC6; - C[i + j*lda + 7] = tempC7; - } - } - } - } else { - for (m = 2; m > 0; m--){ - for (j = lda-1; j >= lda/2; j--){ - for (i = lda-1; i >= 0; i-=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda - 1]; - tempC2 = C[i + j*lda - 2]; - tempC3 = C[i + j*lda - 3]; - tempC4 = C[i + j*lda - 4]; - tempC5 = C[i + j*lda - 5]; - tempC6 = C[i + j*lda - 6]; - tempC7 = C[i + j*lda - 7]; - for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k-1]; - tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i]; - tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1]; - tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2]; - tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3]; - tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4]; - tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5]; - tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6]; - tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7]; - } - C[i + j*lda] = tempC0; - C[i + j*lda - 1] = tempC1; - C[i + j*lda - 2] = tempC2; - C[i + j*lda - 3] = tempC3; - C[i + j*lda - 4] = tempC4; - C[i + j*lda - 5] = tempC5; - C[i + j*lda - 6] = tempC6; - C[i + j*lda - 7] = tempC7; - } - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bh_matmul/dataset.h b/mt/bh_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bh_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bh_matmul/matmul_gendata.pl b/mt/bh_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bh_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bh_matmul/matmul_mi.c b/mt/bh_matmul/matmul_mi.c deleted file mode 100755 index a496bba..0000000 --- a/mt/bh_matmul/matmul_mi.c +++ /dev/null @@ -1,248 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int m, i, j, k, iB0, iB1; - data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; - data_t tempA0, tempA1; - - if (coreid == 0){ - for (m = 0; m < 2; m++){ - for (j = 0; j < lda/2; j++){ - for (i = 0; i < lda; i+=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda+1]; - tempC2 = C[i + j*lda+2]; - tempC3 = C[i + j*lda+3]; - tempC4 = C[i + j*lda+4]; - tempC5 = C[i + j*lda+5]; - tempC6 = C[i + j*lda+6]; - tempC7 = C[i + j*lda+7]; - iB0 = m*lda*lda/2+i; - iB1 = iB0+lda; - for (k = m*lda/2; k < (m+1)*lda/2; k+=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k+1]; - tempC0 += tempA0*B[iB0]+tempA1*B[iB1]; - tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1]; - tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2]; - tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3]; - tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4]; - tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5]; - tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6]; - tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7]; - iB0 += 2*lda; - iB1 += 2*lda; - - } - C[i + j*lda] = tempC0; - C[i + j*lda + 1] = tempC1; - C[i + j*lda + 2] = tempC2; - C[i + j*lda + 3] = tempC3; - C[i + j*lda + 4] = tempC4; - C[i + j*lda + 5] = tempC5; - C[i + j*lda + 6] = tempC6; - C[i + j*lda + 7] = tempC7; - } - } - } - } else { - for (m = 2; m > 0; m--){ - for (j = lda-1; j >= lda/2; j--){ - for (i = lda-1; i >= 0; i-=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda - 1]; - tempC2 = C[i + j*lda - 2]; - tempC3 = C[i + j*lda - 3]; - tempC4 = C[i + j*lda - 4]; - tempC5 = C[i + j*lda - 5]; - tempC6 = C[i + j*lda - 6]; - tempC7 = C[i + j*lda - 7]; - for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k-1]; - tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i]; - tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1]; - tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2]; - tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3]; - tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4]; - tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5]; - tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6]; - tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7]; - } - C[i + j*lda] = tempC0; - C[i + j*lda - 1] = tempC1; - C[i + j*lda - 2] = tempC2; - C[i + j*lda - 3] = tempC3; - C[i + j*lda - 4] = tempC4; - C[i + j*lda - 5] = tempC5; - C[i + j*lda - 6] = tempC6; - C[i + j*lda - 7] = tempC7; - } - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bh_vvadd/bh_vvadd.c b/mt/bh_vvadd/bh_vvadd.c deleted file mode 100755 index c4d06d3..0000000 --- a/mt/bh_vvadd/bh_vvadd.c +++ /dev/null @@ -1,187 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - size_t m; - size_t r; - - m = n >> 1; - r = n - 2 * m; //parity check - - if (coreid == 0) { - // printf("Completed number rounding %ld", m); - } - for (i = 0; i < m; i = i + 1) - { - if (coreid == 0) { - x[i] = x[i] + y[i]; - } else { - x[n-1-i] = x[n-1-i] + y[n-1-i]; - } - } - //strip the last element if odd - if (r == 1) { - x[m] = x[m] + y[m]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bh_vvadd/dataset.h b/mt/bh_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bh_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bh_vvadd/vvadd_gendata.pl b/mt/bh_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bh_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bj_matmul.c b/mt/bj_matmul.c new file mode 100644 index 0000000..df1f880 --- /dev/null +++ b/mt/bj_matmul.c @@ -0,0 +1,97 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + int m, i, j, k, iB0, iB1; + data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; + data_t tempA0, tempA1; + + if(coreid > 1) return; + if (coreid == 0){ + for (m = 0; m < 2; m++){ + for (j = 0; j < lda/2; j++){ + for (i = 0; i < lda; i+=8){ + tempC0 = C[i + j*lda]; + tempC1 = C[i + j*lda+1]; + tempC2 = C[i + j*lda+2]; + tempC3 = C[i + j*lda+3]; + tempC4 = C[i + j*lda+4]; + tempC5 = C[i + j*lda+5]; + tempC6 = C[i + j*lda+6]; + tempC7 = C[i + j*lda+7]; + iB0 = m*lda*lda/2+i; + iB1 = iB0+lda; + for (k = m*lda/2; k < (m+1)*lda/2; k+=2){ + tempA0 = A[j*lda+k]; + tempA1 = A[j*lda+k+1]; + tempC0 += tempA0*B[iB0]+tempA1*B[iB1]; + tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1]; + tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2]; + tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3]; + tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4]; + tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5]; + tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6]; + tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7]; + iB0 += 2*lda; + iB1 += 2*lda; + + } + C[i + j*lda] = tempC0; + C[i + j*lda + 1] = tempC1; + C[i + j*lda + 2] = tempC2; + C[i + j*lda + 3] = tempC3; + C[i + j*lda + 4] = tempC4; + C[i + j*lda + 5] = tempC5; + C[i + j*lda + 6] = tempC6; + C[i + j*lda + 7] = tempC7; + } + } + } + } + if(coreid == 1 || ncores == 1) { + for (m = 2; m > 0; m--){ + for (j = lda-1; j >= lda/2; j--){ + for (i = lda-1; i >= 0; i-=8){ + tempC0 = C[i + j*lda]; + tempC1 = C[i + j*lda - 1]; + tempC2 = C[i + j*lda - 2]; + tempC3 = C[i + j*lda - 3]; + tempC4 = C[i + j*lda - 4]; + tempC5 = C[i + j*lda - 5]; + tempC6 = C[i + j*lda - 6]; + tempC7 = C[i + j*lda - 7]; + for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){ + tempA0 = A[j*lda+k]; + tempA1 = A[j*lda+k-1]; + tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i]; + tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1]; + tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2]; + tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3]; + tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4]; + tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5]; + tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6]; + tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7]; + } + C[i + j*lda] = tempC0; + C[i + j*lda - 1] = tempC1; + C[i + j*lda - 2] = tempC2; + C[i + j*lda - 3] = tempC3; + C[i + j*lda - 4] = tempC4; + C[i + j*lda - 5] = tempC5; + C[i + j*lda - 6] = tempC6; + C[i + j*lda - 7] = tempC7; + } + } + } + } +} diff --git a/mt/bj_matmul/bj_matmul.c b/mt/bj_matmul/bj_matmul.c deleted file mode 100644 index 1642d10..0000000 --- a/mt/bj_matmul/bj_matmul.c +++ /dev/null @@ -1,248 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ -unsigned long _c = -rdcycle(), _i = -rdinstret(); \ -code; \ -_c += rdcycle(), _i += rdinstret(); \ -if (coreid == 0) \ -printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ -stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ -} while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int m, i, j, k, iB0, iB1; - data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; - data_t tempA0, tempA1; - - if (coreid == 0){ - for (m = 0; m < 2; m++){ - for (j = 0; j < lda/2; j++){ - for (i = 0; i < lda; i+=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda+1]; - tempC2 = C[i + j*lda+2]; - tempC3 = C[i + j*lda+3]; - tempC4 = C[i + j*lda+4]; - tempC5 = C[i + j*lda+5]; - tempC6 = C[i + j*lda+6]; - tempC7 = C[i + j*lda+7]; - iB0 = m*lda*lda/2+i; - iB1 = iB0+lda; - for (k = m*lda/2; k < (m+1)*lda/2; k+=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k+1]; - tempC0 += tempA0*B[iB0]+tempA1*B[iB1]; - tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1]; - tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2]; - tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3]; - tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4]; - tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5]; - tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6]; - tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7]; - iB0 += 2*lda; - iB1 += 2*lda; - - } - C[i + j*lda] = tempC0; - C[i + j*lda + 1] = tempC1; - C[i + j*lda + 2] = tempC2; - C[i + j*lda + 3] = tempC3; - C[i + j*lda + 4] = tempC4; - C[i + j*lda + 5] = tempC5; - C[i + j*lda + 6] = tempC6; - C[i + j*lda + 7] = tempC7; - } - } - } - } else { - for (m = 2; m > 0; m--){ - for (j = lda-1; j >= lda/2; j--){ - for (i = lda-1; i >= 0; i-=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda - 1]; - tempC2 = C[i + j*lda - 2]; - tempC3 = C[i + j*lda - 3]; - tempC4 = C[i + j*lda - 4]; - tempC5 = C[i + j*lda - 5]; - tempC6 = C[i + j*lda - 6]; - tempC7 = C[i + j*lda - 7]; - for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k-1]; - tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i]; - tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1]; - tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2]; - tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3]; - tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4]; - tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5]; - tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6]; - tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7]; - } - C[i + j*lda] = tempC0; - C[i + j*lda - 1] = tempC1; - C[i + j*lda - 2] = tempC2; - C[i + j*lda - 3] = tempC3; - C[i + j*lda - 4] = tempC4; - C[i + j*lda - 5] = tempC5; - C[i + j*lda - 6] = tempC6; - C[i + j*lda - 7] = tempC7; - } - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bj_matmul/dataset.h b/mt/bj_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bj_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bj_matmul/matmul_gendata.pl b/mt/bj_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bj_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bj_matmul/matmul_mi.c b/mt/bj_matmul/matmul_mi.c deleted file mode 100644 index 1642d10..0000000 --- a/mt/bj_matmul/matmul_mi.c +++ /dev/null @@ -1,248 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ -unsigned long _c = -rdcycle(), _i = -rdinstret(); \ -code; \ -_c += rdcycle(), _i += rdinstret(); \ -if (coreid == 0) \ -printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ -stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ -} while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int m, i, j, k, iB0, iB1; - data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; - data_t tempA0, tempA1; - - if (coreid == 0){ - for (m = 0; m < 2; m++){ - for (j = 0; j < lda/2; j++){ - for (i = 0; i < lda; i+=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda+1]; - tempC2 = C[i + j*lda+2]; - tempC3 = C[i + j*lda+3]; - tempC4 = C[i + j*lda+4]; - tempC5 = C[i + j*lda+5]; - tempC6 = C[i + j*lda+6]; - tempC7 = C[i + j*lda+7]; - iB0 = m*lda*lda/2+i; - iB1 = iB0+lda; - for (k = m*lda/2; k < (m+1)*lda/2; k+=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k+1]; - tempC0 += tempA0*B[iB0]+tempA1*B[iB1]; - tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1]; - tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2]; - tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3]; - tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4]; - tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5]; - tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6]; - tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7]; - iB0 += 2*lda; - iB1 += 2*lda; - - } - C[i + j*lda] = tempC0; - C[i + j*lda + 1] = tempC1; - C[i + j*lda + 2] = tempC2; - C[i + j*lda + 3] = tempC3; - C[i + j*lda + 4] = tempC4; - C[i + j*lda + 5] = tempC5; - C[i + j*lda + 6] = tempC6; - C[i + j*lda + 7] = tempC7; - } - } - } - } else { - for (m = 2; m > 0; m--){ - for (j = lda-1; j >= lda/2; j--){ - for (i = lda-1; i >= 0; i-=8){ - tempC0 = C[i + j*lda]; - tempC1 = C[i + j*lda - 1]; - tempC2 = C[i + j*lda - 2]; - tempC3 = C[i + j*lda - 3]; - tempC4 = C[i + j*lda - 4]; - tempC5 = C[i + j*lda - 5]; - tempC6 = C[i + j*lda - 6]; - tempC7 = C[i + j*lda - 7]; - for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){ - tempA0 = A[j*lda+k]; - tempA1 = A[j*lda+k-1]; - tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i]; - tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1]; - tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2]; - tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3]; - tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4]; - tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5]; - tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6]; - tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7]; - } - C[i + j*lda] = tempC0; - C[i + j*lda - 1] = tempC1; - C[i + j*lda - 2] = tempC2; - C[i + j*lda - 3] = tempC3; - C[i + j*lda - 4] = tempC4; - C[i + j*lda - 5] = tempC5; - C[i + j*lda - 6] = tempC6; - C[i + j*lda - 7] = tempC7; - } - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bj_vvadd/bj_vvadd.c b/mt/bj_vvadd/bj_vvadd.c deleted file mode 100755 index 4f69c35..0000000 --- a/mt/bj_vvadd/bj_vvadd.c +++ /dev/null @@ -1,169 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - for (i = coreid*n/ncores; i < (coreid+1)*n/ncores; i++){ - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bj_vvadd/dataset.h b/mt/bj_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bj_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bj_vvadd/vvadd_gendata.pl b/mt/bj_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bj_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bk_matmul.c b/mt/bk_matmul.c new file mode 100755 index 0000000..dae49fc --- /dev/null +++ b/mt/bk_matmul.c @@ -0,0 +1,92 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k, ii, jj, kk; + if(coreid > 1) return; + if (coreid == 0) { +// for ( ii = 0; ii < 32; ii+=IC ) + for ( kk = 0; kk < 32; kk+=16 ) + for ( j = 0; j < 16; j++ ) +// for ( j = 0; j < 16; j++ ) + { + for ( i = 0; i < 32; i+=8 ) +// for ( i = ii; i < ii + IC && i < 32; i+=8 ) + { + data_t temp0 = C[i+j*32]; + data_t temp1 = C[i+j*32+1]; + data_t temp2 = C[i+j*32+2]; + data_t temp3 = C[i+j*32+3]; + data_t temp4 = C[i+j*32+4]; + data_t temp5 = C[i+j*32+5]; + data_t temp6 = C[i+j*32+6]; + data_t temp7 = C[i+j*32+7]; + for ( k = kk; k < kk+16 && k < 32; k++ ) +// for ( k = 0; k < 32; k++ ) + { + data_t tempA = A[j*32+k]; + temp0 += tempA * B[k*32 + i]; + temp1 += tempA * B[k*32 + i+1]; + temp2 += tempA * B[k*32 + i+2]; + temp3 += tempA * B[k*32 + i+3]; + temp4 += tempA * B[k*32 + i+4]; + temp5 += tempA * B[k*32 + i+5]; + temp6 += tempA * B[k*32 + i+6]; + temp7 += tempA * B[k*32 + i+7]; + } + C[i+j*32] = temp0; + C[i+j*32+1] = temp1; + C[i+j*32+2] = temp2; + C[i+j*32+3] = temp3; + C[i+j*32+4] = temp4; + C[i+j*32+5] = temp5; + C[i+j*32+6] = temp6; + C[i+j*32+7] = temp7; + } + } + } + if(coreid == 1 || ncores == 1) { +// for ( ii = 0; ii < 32; ii+=IC ) + for ( kk = 0; kk < 32; kk+=16 ) + for ( j = 16; j < 32; j++ ) +// for ( j = 16; j < 32; j++ ) + { + for ( i = 0; i < 32; i+=8 ) +// for ( i = ii; i < ii + IC && i < 32; i+=8 ) + { + data_t temp0 = C[i+j*32]; + data_t temp1 = C[i+j*32+1]; + data_t temp2 = C[i+j*32+2]; + data_t temp3 = C[i+j*32+3]; + data_t temp4 = C[i+j*32+4]; + data_t temp5 = C[i+j*32+5]; + data_t temp6 = C[i+j*32+6]; + data_t temp7 = C[i+j*32+7]; + for ( k = kk; k < kk+16 && k < 32; k++ ) + { + data_t tempA = A[j*32+k]; + temp0 += tempA * B[k*32 + i]; + temp1 += tempA * B[k*32 + i+1]; + temp2 += tempA * B[k*32 + i+2]; + temp3 += tempA * B[k*32 + i+3]; + temp4 += tempA * B[k*32 + i+4]; + temp5 += tempA * B[k*32 + i+5]; + temp6 += tempA * B[k*32 + i+6]; + temp7 += tempA * B[k*32 + i+7]; + } + C[i+j*32] = temp0; + C[i+j*32+1] = temp1; + C[i+j*32+2] = temp2; + C[i+j*32+3] = temp3; + C[i+j*32+4] = temp4; + C[i+j*32+5] = temp5; + C[i+j*32+6] = temp6; + C[i+j*32+7] = temp7; + } + + } + } +} diff --git a/mt/bk_matmul/bk_matmul.c b/mt/bk_matmul/bk_matmul.c deleted file mode 100755 index 9fa22b1..0000000 --- a/mt/bk_matmul/bk_matmul.c +++ /dev/null @@ -1,326 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -///* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -//*/ - /* - int i, j, k, kk; - if (coreid) { - for ( i = 0; i < 16; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { - for ( i = 16; i < 32; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - */ -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - if (coreid) { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = 0; j < 16; j++ ) -// for ( j = 0; j < 16; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = 16; j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bk_matmul/dataset.h b/mt/bk_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bk_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bk_matmul/matmul_gendata.pl b/mt/bk_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bk_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bk_matmul/matmul_mi.c b/mt/bk_matmul/matmul_mi.c deleted file mode 100755 index b45071b..0000000 --- a/mt/bk_matmul/matmul_mi.c +++ /dev/null @@ -1,370 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -///* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -//*/ - /* - int i, j, k, kk; - if (coreid) { - for ( i = 0; i < 16; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { - for ( i = 16; i < 32; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - */ -} - - -#define KC 16 -#define IC 16 -#define JC 16 -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - if (coreid) { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( jj = 0; jj < 16; jj+=16 ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = jj; j < jj+16 && j < 16; j++ ) -// for ( j = 0; j < 16; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( jj = 16; jj < 32; jj+= 16 ) { - for ( kk = 16; kk < 32; kk+=16 ) - for ( j = jj; j < jj+16 && j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - for ( kk = 0; kk < 16; kk+=16 ) - for ( j = jj; j < jj+16 && j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bk_matmul/matmul_msi.c b/mt/bk_matmul/matmul_msi.c deleted file mode 100755 index 253ea12..0000000 --- a/mt/bk_matmul/matmul_msi.c +++ /dev/null @@ -1,326 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -///* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -//*/ - /* - int i, j, k, kk; - if (coreid) { - for ( i = 0; i < 16; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { - for ( i = 16; i < 32; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - */ -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - if (coreid) { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = 0; j < 16; j++ ) -// for ( j = 0; j < 16; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = 16; j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bk_vvadd/bk_vvadd.c b/mt/bk_vvadd/bk_vvadd.c deleted file mode 100755 index 20fe4af..0000000 --- a/mt/bk_vvadd/bk_vvadd.c +++ /dev/null @@ -1,178 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - if (coreid) { - for (i = 0; i < n / 2; i++) - x[i] = x[i] + y[i]; - } else { - for (i = n / 2; i < n; i++) - x[i] = x[i] + y[i]; - } -/* - for ( i = (coreid * n) / ncores; i < ((coreid+1)*n)/ncores; i++ ) { - x[i] = x[i] + y[i]; - } -*/ -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bk_vvadd/dataset.h b/mt/bk_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bk_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bk_vvadd/vvadd_gendata.pl b/mt/bk_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bk_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bm_matmul.c b/mt/bm_matmul.c new file mode 100644 index 0000000..ae225d4 --- /dev/null +++ b/mt/bm_matmul.c @@ -0,0 +1,205 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int i, j, k; + int space=lda/ncores; + int max= space*coreid+space; + data_t temp=0; + + data_t temp1=0; + data_t temp2=0; + data_t temp3=0; + data_t temp4=0; + + data_t temp_1=0; + + data_t temp1_1=0; + data_t temp2_1=0; + data_t temp3_1=0; + data_t temp4_1=0; + + data_t temp_2=0; + + data_t temp1_2=0; + data_t temp2_2=0; + data_t temp3_2=0; + data_t temp4_2=0; + + data_t temp_3=0; + + data_t temp1_3=0; + data_t temp2_3=0; + data_t temp3_3=0; + data_t temp4_3=0; + + if (coreid!=ncores-1){ + //main loop + for (i=space*coreid;i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k; - int space=lda/ncores; - int max= space*coreid+space; - data_t temp=0; - - data_t temp1=0; - data_t temp2=0; - data_t temp3=0; - data_t temp4=0; - - data_t temp_1=0; - - data_t temp1_1=0; - data_t temp2_1=0; - data_t temp3_1=0; - data_t temp4_1=0; - - data_t temp_2=0; - - data_t temp1_2=0; - data_t temp2_2=0; - data_t temp3_2=0; - data_t temp4_2=0; - - data_t temp_3=0; - - data_t temp1_3=0; - data_t temp2_3=0; - data_t temp3_3=0; - data_t temp4_3=0; - - if (coreid!=ncores-1){ - //main loop - for (i=space*coreid;i[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bm_matmul/matmul_mi.c b/mt/bm_matmul/matmul_mi.c deleted file mode 100644 index 3bdd096..0000000 --- a/mt/bm_matmul/matmul_mi.c +++ /dev/null @@ -1,348 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k; - int space=lda/ncores; - int max= space*coreid+space; - static data_t B1[32*32]; - if (coreid==ncores-1){ - for (i=0; i -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - size_t space=n/ncores; - size_t max= space*coreid+space; - if (coreid!=ncores-1){ - for (i=space*coreid;i[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bn_matmul/bn_matmul.c b/mt/bn_matmul/bn_matmul.c deleted file mode 100755 index 9fa22b1..0000000 --- a/mt/bn_matmul/bn_matmul.c +++ /dev/null @@ -1,326 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -///* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -//*/ - /* - int i, j, k, kk; - if (coreid) { - for ( i = 0; i < 16; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { - for ( i = 16; i < 32; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - */ -} - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - if (coreid) { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = 0; j < 16; j++ ) -// for ( j = 0; j < 16; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = 16; j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bn_matmul/dataset.h b/mt/bn_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bn_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bn_matmul/matmul_gendata.pl b/mt/bn_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bn_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bn_matmul/matmul_mi.c b/mt/bn_matmul/matmul_mi.c deleted file mode 100644 index b45071b..0000000 --- a/mt/bn_matmul/matmul_mi.c +++ /dev/null @@ -1,370 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -///* - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } -//*/ - /* - int i, j, k, kk; - if (coreid) { - for ( i = 0; i < 16; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { - for ( i = 16; i < 32; i+=8 ) - { - for ( j = 0; j < 32; j++ ) - { - data_t temp0 = 0; - data_t temp1 = 0; - data_t temp2 = 0; - data_t temp3 = 0; - data_t temp4 = 0; - data_t temp5 = 0; - data_t temp6 = 0; - data_t temp7 = 0; - for ( kk = 0; kk < 32; kk+=8 ) - for ( k = kk; k < kk+8; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - */ -} - - -#define KC 16 -#define IC 16 -#define JC 16 -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k, ii, jj, kk; - if (coreid) { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( jj = 0; jj < 16; jj+=16 ) - for ( kk = 0; kk < 32; kk+=16 ) - for ( j = jj; j < jj+16 && j < 16; j++ ) -// for ( j = 0; j < 16; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) -// for ( k = 0; k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - } - } else { -// for ( ii = 0; ii < 32; ii+=IC ) - for ( jj = 16; jj < 32; jj+= 16 ) { - for ( kk = 16; kk < 32; kk+=16 ) - for ( j = jj; j < jj+16 && j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - for ( kk = 0; kk < 16; kk+=16 ) - for ( j = jj; j < jj+16 && j < 32; j++ ) -// for ( j = 16; j < 32; j++ ) - { - for ( i = 0; i < 32; i+=8 ) -// for ( i = ii; i < ii + IC && i < 32; i+=8 ) - { - data_t temp0 = C[i+j*32]; - data_t temp1 = C[i+j*32+1]; - data_t temp2 = C[i+j*32+2]; - data_t temp3 = C[i+j*32+3]; - data_t temp4 = C[i+j*32+4]; - data_t temp5 = C[i+j*32+5]; - data_t temp6 = C[i+j*32+6]; - data_t temp7 = C[i+j*32+7]; - for ( k = kk; k < kk+16 && k < 32; k++ ) - { - data_t tempA = A[j*32+k]; - temp0 += tempA * B[k*32 + i]; - temp1 += tempA * B[k*32 + i+1]; - temp2 += tempA * B[k*32 + i+2]; - temp3 += tempA * B[k*32 + i+3]; - temp4 += tempA * B[k*32 + i+4]; - temp5 += tempA * B[k*32 + i+5]; - temp6 += tempA * B[k*32 + i+6]; - temp7 += tempA * B[k*32 + i+7]; - } - C[i+j*32] = temp0; - C[i+j*32+1] = temp1; - C[i+j*32+2] = temp2; - C[i+j*32+3] = temp3; - C[i+j*32+4] = temp4; - C[i+j*32+5] = temp5; - C[i+j*32+6] = temp6; - C[i+j*32+7] = temp7; - } - - } - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bn_vvadd/bn_vvadd.c b/mt/bn_vvadd/bn_vvadd.c deleted file mode 100755 index 6c1459e..0000000 --- a/mt/bn_vvadd/bn_vvadd.c +++ /dev/null @@ -1,171 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - - for (i = coreid*(n/ncores); i<(coreid+1)*n/ncores; i++) - { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bn_vvadd/dataset.h b/mt/bn_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bn_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bn_vvadd/vvadd_gendata.pl b/mt/bn_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bn_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bo_matmul.c b/mt/bo_matmul.c new file mode 100644 index 0000000..2fb24ab --- /dev/null +++ b/mt/bo_matmul.c @@ -0,0 +1,98 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k; + data_t B_trans[32*32]; + data_t acc_temp0, acc_temp1; + data_t *A_j, *B_i; + data_t *A_j_k, *B_i_k; + int z; + + //for (i = 0; i < 32; i++) { + // for (j = 0; j < 32; j++) { + // B_trans[i*lda+j] = B[i+j*lda]; + // } + //} + + if (coreid == 0) { + for (i = 0; i < 32; i++) { + B_i = B_trans+i*32; + for (z = 0; z < 32; z++) { + *(B_i+z) = B[i+z*32]; + } + for (j = 0; j < 16; j+=2) { + A_j = A+j*lda; + acc_temp0 = 0; + for (k = 0; k < 32; k+=8) { + A_j_k = A_j+k; + B_i_k = B_i+k; + acc_temp0 += *(A_j_k) * *(B_i_k); + acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1); + acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2); + acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3); + acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4); + acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5); + acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6); + acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7); + } + A_j += 32; + + acc_temp1 = 0; + for (k = 0; k < 32; k+=8) { + acc_temp1 += *(A_j+k) * *(B_i+k); + acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); + acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); + acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); + acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); + acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); + acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); + acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); + } + + C[i + j*lda] = acc_temp0; + C[i + (j+1)*lda] = acc_temp1; + } + } + } + if (coreid == 1 || ncores == 1) { + for (i = 0; i < 32; i++) { + B_i = B_trans+i*32; + for (z = 0; z < 32; z++) { + *(B_i+z) = B[i+z*32]; + } + for (j = 16; j < 32; j+=2) { + A_j = A+j*lda; + acc_temp0 = 0; + for (k = 0; k < 32; k+=8) { + acc_temp0 += *(A_j+k) * *(B_i+k); + acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1); + acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2); + acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3); + acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4); + acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5); + acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6); + acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7); + } + A_j += 32; + + acc_temp1 = 0; + for (k = 0; k < 32; k+=8) { + acc_temp1 += *(A_j+k) * *(B_i+k); + acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); + acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); + acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); + acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); + acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); + acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); + acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); + } + C[i + j*lda] = acc_temp0; + C[i + (j+1)*lda] = acc_temp1; + } + } + } +} diff --git a/mt/bo_matmul/bo_matmul.c b/mt/bo_matmul/bo_matmul.c deleted file mode 100644 index dd06110..0000000 --- a/mt/bo_matmul/bo_matmul.c +++ /dev/null @@ -1,341 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - -void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t B_trans[32*32]; - data_t acc_temp0, acc_temp1; - data_t *A_j, *B_i; - data_t *A_j_k, *B_i_k; - int z; - - //for (i = 0; i < 32; i++) { - // for (j = 0; j < 32; j++) { - // B_trans[i*lda+j] = B[i+j*lda]; - // } - //} - - if (coreid == 0) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 0; j < 16; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - A_j_k = A_j+k; - B_i_k = B_i+k; - acc_temp0 += *(A_j_k) * *(B_i_k); - acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1); - acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2); - acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3); - acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4); - acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5); - acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6); - acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } else if (coreid == 1) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 16; j < 32; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp0 += *(A_j+k) * *(B_i+k); - acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } -} - -void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - if (coreid == 0) { - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } - } else if (coreid == 1) { - for ( i = 16; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - for ( i = 0; i < 16; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - - } -} - -void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - // ENABLE_SHARING = false is MI - // ENABLE_SHARING = true is MSI - matmul_MI_transpose(lda, A, B, C); - //matmul_MSI(lda, A, B, C); -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// //verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bo_matmul/dataset.h b/mt/bo_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bo_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bo_matmul/matmul_gendata.pl b/mt/bo_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bo_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bo_matmul/matmul_mi.c b/mt/bo_matmul/matmul_mi.c deleted file mode 100644 index 3ccdb62..0000000 --- a/mt/bo_matmul/matmul_mi.c +++ /dev/null @@ -1,341 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - -void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t B_trans[32*32]; - data_t acc_temp0, acc_temp1; - data_t *A_j, *B_i; - data_t *A_j_k, *B_i_k; - int z; - - //for (i = 0; i < 32; i++) { - // for (j = 0; j < 32; j++) { - // B_trans[i*lda+j] = B[i+j*lda]; - // } - //} - - if (coreid == 0) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 0; j < 16; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - A_j_k = A_j+k; - B_i_k = B_i+k; - acc_temp0 += *(A_j_k) * *(B_i_k); - acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1); - acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2); - acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3); - acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4); - acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5); - acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6); - acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } else if (coreid == 1) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 16; j < 32; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp0 += *(A_j+k) * *(B_i+k); - acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } -} - -void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - if (coreid == 0) { - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } - } else if (coreid == 1) { - for ( i = 16; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - for ( i = 0; i < 16; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - - } -} - -void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - // ENABLE_SHARING = false is MI - // ENABLE_SHARING = true is MSI - matmul_MI_transpose(lda, A, B, C); - //matmul_MSI(lda, A, B, C); -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// //verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); -// - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bo_vvadd/bo_vvadd.c b/mt/bo_vvadd/bo_vvadd.c deleted file mode 100755 index efdee6c..0000000 --- a/mt/bo_vvadd/bo_vvadd.c +++ /dev/null @@ -1,172 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - - size_t i; - for (i = 0; i < (n/ncores); i+= 1) - { - size_t ind = (n/ncores)*coreid+i; - x[ind] = x[ind] + y[ind]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bo_vvadd/dataset.h b/mt/bo_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bo_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bo_vvadd/vvadd_gendata.pl b/mt/bo_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bo_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bp_matmul/bp_matmul.c b/mt/bp_matmul/bp_matmul.c deleted file mode 100755 index dd06110..0000000 --- a/mt/bp_matmul/bp_matmul.c +++ /dev/null @@ -1,341 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - -void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t B_trans[32*32]; - data_t acc_temp0, acc_temp1; - data_t *A_j, *B_i; - data_t *A_j_k, *B_i_k; - int z; - - //for (i = 0; i < 32; i++) { - // for (j = 0; j < 32; j++) { - // B_trans[i*lda+j] = B[i+j*lda]; - // } - //} - - if (coreid == 0) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 0; j < 16; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - A_j_k = A_j+k; - B_i_k = B_i+k; - acc_temp0 += *(A_j_k) * *(B_i_k); - acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1); - acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2); - acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3); - acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4); - acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5); - acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6); - acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } else if (coreid == 1) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 16; j < 32; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp0 += *(A_j+k) * *(B_i+k); - acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } -} - -void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - if (coreid == 0) { - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } - } else if (coreid == 1) { - for ( i = 16; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - for ( i = 0; i < 16; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - - } -} - -void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - // ENABLE_SHARING = false is MI - // ENABLE_SHARING = true is MSI - matmul_MI_transpose(lda, A, B, C); - //matmul_MSI(lda, A, B, C); -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// //verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bp_matmul/dataset.h b/mt/bp_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bp_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bp_matmul/matmul_gendata.pl b/mt/bp_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bp_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bp_matmul/matmul_mi.c b/mt/bp_matmul/matmul_mi.c deleted file mode 100755 index dd06110..0000000 --- a/mt/bp_matmul/matmul_mi.c +++ /dev/null @@ -1,341 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - -void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t B_trans[32*32]; - data_t acc_temp0, acc_temp1; - data_t *A_j, *B_i; - data_t *A_j_k, *B_i_k; - int z; - - //for (i = 0; i < 32; i++) { - // for (j = 0; j < 32; j++) { - // B_trans[i*lda+j] = B[i+j*lda]; - // } - //} - - if (coreid == 0) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 0; j < 16; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - A_j_k = A_j+k; - B_i_k = B_i+k; - acc_temp0 += *(A_j_k) * *(B_i_k); - acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1); - acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2); - acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3); - acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4); - acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5); - acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6); - acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } else if (coreid == 1) { - for (i = 0; i < 32; i++) { - B_i = B_trans+i*32; - for (z = 0; z < 32; z++) { - *(B_i+z) = B[i+z*32]; - } - for (j = 16; j < 32; j+=2) { - A_j = A+j*lda; - acc_temp0 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp0 += *(A_j+k) * *(B_i+k); - acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7); - } - A_j += 32; - - acc_temp1 = 0; - for (k = 0; k < 32; k+=8) { - acc_temp1 += *(A_j+k) * *(B_i+k); - acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1); - acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2); - acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3); - acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4); - acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5); - acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6); - acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7); - } - C[i + j*lda] = acc_temp0; - C[i + (j+1)*lda] = acc_temp1; - } - } - } -} - -void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - if (coreid == 0) { - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } - } else if (coreid == 1) { - for ( i = 16; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - for ( i = 0; i < 16; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k+=4 ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32); - acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32); - acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32); - } - C[i + j*32] = acc_temp; - } - } - - } -} - -void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - data_t acc_temp; - data_t *A_j, *B_i; - int j_start = coreid*16; - int j_end = (coreid*16)+16; - for ( i = 0; i < 32; i++ ) { - B_i = B + i; - for ( j = j_start; j < j_end; j++ ) - { - acc_temp = 0; - A_j = A + j*32; - for ( k = 0; k < 32; k++ ) - { - acc_temp += *(A_j + k) * *(B_i + k*32); - } - C[i + j*32] = acc_temp; - } - } -} - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - // ENABLE_SHARING = false is MI - // ENABLE_SHARING = true is MSI - matmul_MI_transpose(lda, A, B, C); - //matmul_MSI(lda, A, B, C); -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// //verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bp_vvadd/bp_vvadd.c b/mt/bp_vvadd/bp_vvadd.c deleted file mode 100755 index 41d56ab..0000000 --- a/mt/bp_vvadd/bp_vvadd.c +++ /dev/null @@ -1,178 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - - if (coreid == 0) { - for (i = 0; i < n/2; i++) - { - x[i] = x[i] + y[i]; - } - } else if (coreid == 1) { - for (i = n/2; i < n; i++) - { - x[i] = x[i] + y[i]; - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bp_vvadd/dataset.h b/mt/bp_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bp_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bp_vvadd/vvadd_gendata.pl b/mt/bp_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bp_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/br_matmul.c b/mt/br_matmul.c new file mode 100755 index 0000000..13263e4 --- /dev/null +++ b/mt/br_matmul.c @@ -0,0 +1,128 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int j2, i2, k2, j, i, k; + int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07; + int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17; + int jBLOCK = 32; + int iBLOCK = 16; + int kBLOCK = 32; + static __thread int tB[4096]; //__thread + int startInd = coreid*(lda/ncores); + int endInd = (coreid+1)*(lda/ncores); + + //tranpose B (block?) + for (i = 0; i < lda; i += 2) { + for (j = startInd; j < endInd; j += 2) { + tB[j*lda + i] = B[i*lda + j]; + tB[(j + 1)*lda + i] = B[i*lda + j + 1]; + tB[j*lda + i + 1] = B[(i + 1)*lda + j]; + tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1]; + } + barrier(ncores); + } + + // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k] + for ( j2 = 0; j2 < lda; j2 += jBLOCK ) + for ( i2 = startInd; i2 < endInd; i2 += iBLOCK ) + for ( j = j2; j < j2 + jBLOCK; j += 2 ) + for ( k2 = 0; k2 < lda; k2 += kBLOCK ) + for ( i = i2; i < i2 + iBLOCK; i += 4) { + tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0]; + tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1]; + tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2]; + tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3]; + //tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4]; + //tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5]; + //tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6]; + //tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7]; + for ( k = k2; k < k2 + kBLOCK; k += 4) { + tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k]; + tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k]; + tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k]; + tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k]; + //tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k]; + //tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k]; + //tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k]; + //tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k]; + tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k]; + tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k]; + tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k]; + tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k]; + //tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k]; + //tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k]; + //tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k]; + //tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k]; + + tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1]; + tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1]; + tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1]; + tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1]; + //tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1]; + //tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1]; + //tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1]; + //tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1]; + tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1]; + tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1]; + tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1]; + tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1]; + //tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1]; + //tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1]; + //tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1]; + //tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1]; + + tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2]; + tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2]; + tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2]; + tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2]; + //tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2]; + //tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2]; + //tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2]; + //tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2]; + tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2]; + tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2]; + tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2]; + tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2]; + //tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2]; + //tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2]; + //tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2]; + //tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2]; + + tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3]; + tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3]; + tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3]; + tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3]; + //tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3]; + //tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3]; + //tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3]; + //tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3]; + tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3]; + tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3]; + tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3]; + tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3]; + //tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3]; + //tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3]; + //tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3]; + //tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3]; + } + C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10; + C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11; + C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12; + C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13; + //C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14; + //C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15; + //C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16; + //C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17; + barrier(ncores); + } +} diff --git a/mt/br_matmul/br_matmul.c b/mt/br_matmul/br_matmul.c deleted file mode 100755 index f831ac2..0000000 --- a/mt/br_matmul/br_matmul.c +++ /dev/null @@ -1,283 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: Benjamin Han -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int j2, i2, k2, j, i, k; - int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07; - int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17; - int jBLOCK = 32; - int iBLOCK = 16; - int kBLOCK = 32; - static __thread int tB[4096]; //__thread - int startInd = 0; - int endInd = lda >> 1; - if (coreid == 1) { - startInd = lda >> 1; - endInd = lda; - } - - //tranpose B (block?) - for (i = 0; i < lda; i += 2) { - for (j = startInd; j < endInd; j += 2) { - tB[j*lda + i] = B[i*lda + j]; - tB[(j + 1)*lda + i] = B[i*lda + j + 1]; - tB[j*lda + i + 1] = B[(i + 1)*lda + j]; - tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1]; - } - } - barrier(ncores); - - // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k] - for ( j2 = 0; j2 < lda; j2 += jBLOCK ) - for ( i2 = startInd; i2 < endInd; i2 += iBLOCK ) - for ( j = j2; j < j2 + jBLOCK; j += 2 ) - for ( k2 = 0; k2 < lda; k2 += kBLOCK ) - for ( i = i2; i < i2 + iBLOCK; i += 4) { - tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0]; - tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1]; - tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2]; - tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3]; - //tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4]; - //tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5]; - //tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6]; - //tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7]; - for ( k = k2; k < k2 + kBLOCK; k += 4) { - tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k]; - tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k]; - tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k]; - tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k]; - //tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k]; - //tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k]; - //tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k]; - //tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k]; - tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k]; - tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k]; - tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k]; - tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k]; - //tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k]; - //tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k]; - //tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k]; - //tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k]; - - tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1]; - tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1]; - tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1]; - tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1]; - //tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1]; - //tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1]; - //tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1]; - //tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1]; - tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1]; - tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1]; - tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1]; - tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1]; - //tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1]; - //tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1]; - //tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1]; - //tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1]; - - tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2]; - tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2]; - tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2]; - tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2]; - //tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2]; - //tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2]; - //tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2]; - //tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2]; - tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2]; - tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2]; - tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2]; - tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2]; - //tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2]; - //tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2]; - //tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2]; - //tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2]; - - tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3]; - tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3]; - tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3]; - tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3]; - //tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3]; - //tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3]; - //tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3]; - //tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3]; - tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3]; - tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3]; - tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3]; - tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3]; - //tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3]; - //tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3]; - //tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3]; - //tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3]; - } - C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10; - C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11; - C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12; - C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13; - //C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14; - //C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15; - //C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16; - //C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/br_matmul/dataset.h b/mt/br_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/br_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/br_matmul/matmul_gendata.pl b/mt/br_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/br_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/br_matmul/matmul_mi.c b/mt/br_matmul/matmul_mi.c deleted file mode 100755 index 14a0705..0000000 --- a/mt/br_matmul/matmul_mi.c +++ /dev/null @@ -1,283 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: Benjamin Han -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int j2, i2, k2, j, i, k; - int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07; - int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17; - int jBLOCK = 32; - int iBLOCK = 16; - int kBLOCK = 32; - static __thread int tB[4096]; //__thread - int startInd = 0; - int endInd = lda >> 1; - if (coreid == 1) { - startInd = lda >> 1; - endInd = lda; - } - - //tranpose B (block?) - for (i = 0; i < lda; i += 2) { - for (j = startInd; j < endInd; j += 2) { - tB[j*lda + i] = B[i*lda + j]; - tB[(j + 1)*lda + i] = B[i*lda + j + 1]; - tB[j*lda + i + 1] = B[(i + 1)*lda + j]; - tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1]; - } - } - barrier(nc); - - // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k] - for ( j2 = 0; j2 < lda; j2 += jBLOCK ) - for ( i2 = startInd; i2 < endInd; i2 += iBLOCK ) - for ( j = j2; j < j2 + jBLOCK; j += 2 ) - for ( k2 = 0; k2 < lda; k2 += kBLOCK ) - for ( i = i2; i < i2 + iBLOCK; i += 4) { - tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0]; - tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1]; - tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2]; - tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3]; - //tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4]; - //tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5]; - //tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6]; - //tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7]; - for ( k = k2; k < k2 + kBLOCK; k += 4) { - tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k]; - tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k]; - tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k]; - tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k]; - //tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k]; - //tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k]; - //tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k]; - //tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k]; - tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k]; - tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k]; - tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k]; - tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k]; - //tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k]; - //tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k]; - //tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k]; - //tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k]; - - tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1]; - tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1]; - tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1]; - tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1]; - //tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1]; - //tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1]; - //tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1]; - //tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1]; - tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1]; - tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1]; - tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1]; - tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1]; - //tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1]; - //tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1]; - //tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1]; - //tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1]; - - tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2]; - tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2]; - tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2]; - tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2]; - //tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2]; - //tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2]; - //tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2]; - //tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2]; - tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2]; - tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2]; - tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2]; - tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2]; - //tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2]; - //tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2]; - //tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2]; - //tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2]; - - tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3]; - tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3]; - tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3]; - tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3]; - //tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3]; - //tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3]; - //tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3]; - //tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3]; - tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3]; - tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3]; - tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3]; - tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3]; - //tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3]; - //tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3]; - //tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3]; - //tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3]; - } - C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10; - C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11; - C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12; - C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13; - //C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14; - //C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15; - //C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16; - //C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - -// // Execute the provided, naive matmul -// barrier(nc); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); -// -// -// // verify -// verifyMT(ARRAY_SIZE, results_data, verify_data); -// -// // clear results from the first trial -// size_t i; -// if (coreid == 0) -// for (i=0; i < ARRAY_SIZE; i++) -// results_data[i] = 0; -// barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/br_vvadd/br_vvadd.c b/mt/br_vvadd/br_vvadd.c deleted file mode 100755 index ce0d6bb..0000000 --- a/mt/br_vvadd/br_vvadd.c +++ /dev/null @@ -1,174 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : Benjamin Han -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - int startInd = 0; - int endInd = n >> 1; - if (coreid == 1) { - startInd = n >> 1; - endInd = n; - } - for (size_t i = startInd ; i < endInd; i+=1) { - x[i] = x[i] + y[i]; - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/br_vvadd/dataset.h b/mt/br_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/br_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/br_vvadd/vvadd_gendata.pl b/mt/br_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/br_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bs_matmul.c b/mt/bs_matmul.c new file mode 100755 index 0000000..f80c7e1 --- /dev/null +++ b/mt/bs_matmul.c @@ -0,0 +1,26 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i,j,k,a,b,a1,a2,a3,c; + for (j=coreid; j -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i,j,k,a,b,a1,a2,a3,c; - for (j=coreid; j[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bs_matmul/matmul_mi.c b/mt/bs_matmul/matmul_mi.c deleted file mode 100644 index ec0d89b..0000000 --- a/mt/bs_matmul/matmul_mi.c +++ /dev/null @@ -1,190 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ -int i,j,k,a,b,b1,a1,a2,a3,c,c1,c2,c3,b2,b3; - for (j=coreid*4; j -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - size_t i; - for (i = coreid*8; i[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/bt_matmul/bt_matmul.c b/mt/bt_matmul/bt_matmul.c deleted file mode 100755 index 4cf652d..0000000 --- a/mt/bt_matmul/bt_matmul.c +++ /dev/null @@ -1,296 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k , jj , kk; - int start_i = coreid*lda/2; - int end_i = start_i + lda/2; - int step_j, step_k; - int start_k, end_k, start_j, end_j; - int j_lda; - int pos_A , pos_B, pos_C; - data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07; - data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17; - data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7; - - if (coreid == 0) - { - step_k = 1; - start_k= 0; - end_k = lda; - - step_j = 2; - start_j= 0; - end_j = lda; - - }else - { - - step_k = -1; - start_k = lda-1; - end_k = -1; - - step_j = -2; - start_j= lda-2; - end_j = -2; - } - - for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) ) - { - for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) ) - { - for ( i = start_i; i < end_i; i+=8 ) - { - //pos_C = i + jj*lda; - for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j ) - { - - pos_C = i + j*lda; - temp00 = C[(pos_C + 0)]; - temp01 = C[(pos_C + 1)]; - temp02 = C[(pos_C + 2)]; - temp03 = C[(pos_C + 3)]; - temp04 = C[(pos_C + 4)]; - temp05 = C[(pos_C + 5)]; - temp06 = C[(pos_C + 6)]; - temp07 = C[(pos_C + 7)]; - - //pos_C += lda; - pos_C = i + (j+1)*lda; - - temp10 = C[(pos_C + 0)]; - temp11 = C[(pos_C + 1)]; - temp12 = C[(pos_C + 2)]; - temp13 = C[(pos_C + 3)]; - temp14 = C[(pos_C + 4)]; - temp15 = C[(pos_C + 5)]; - temp16 = C[(pos_C + 6)]; - temp17 = C[(pos_C + 7)]; - - pos_B = kk*lda + i; - pos_A = j*lda + kk; - for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k ) - { - temp_A0 = A[ pos_A ] ; - temp_A1 = A[pos_A +lda]; - - temp00 += temp_A0 * B[(pos_B + 0)]; - temp01 += temp_A0 * B[(pos_B + 1)]; - temp02 += temp_A0 * B[(pos_B + 2)]; - temp03 += temp_A0 * B[(pos_B + 3)]; - temp04 += temp_A0 * B[(pos_B + 4)]; - temp05 += temp_A0 * B[(pos_B + 5)]; - temp06 += temp_A0 * B[(pos_B + 6)]; - temp07 += temp_A0 * B[(pos_B + 7)]; - - temp10 += temp_A1 * B[(pos_B + 0)]; - temp11 += temp_A1 * B[(pos_B + 1)]; - temp12 += temp_A1 * B[(pos_B + 2)]; - temp13 += temp_A1 * B[(pos_B + 3)]; - temp14 += temp_A1 * B[(pos_B + 4)]; - temp15 += temp_A1 * B[(pos_B + 5)]; - temp16 += temp_A1 * B[(pos_B + 6)]; - temp17 += temp_A1 * B[(pos_B + 7)]; - - pos_B += (lda*step_k) ; - pos_A += step_k; - } - //barrier(nc); - - C[(pos_C + 0)] = temp10; - C[(pos_C + 1)] = temp11; - C[(pos_C + 2)] = temp12; - C[(pos_C + 3)] = temp13; - C[(pos_C + 4)] = temp14; - C[(pos_C + 5)] = temp15; - C[(pos_C + 6)] = temp16; - C[(pos_C + 7)] = temp17; - //barrier(nc); - - pos_C = i + j*lda; - //pos_C -= lda; - C[(pos_C + 0)] = temp00; - C[(pos_C + 1)] = temp01; - C[(pos_C + 2)] = temp02; - C[(pos_C + 3)] = temp03; - C[(pos_C + 4)] = temp04; - C[(pos_C + 5)] = temp05; - C[(pos_C + 6)] = temp06; - C[(pos_C + 7)] = temp07; - //barrier(nc); - //pos_C += step_j * lda; - } - //barrier(nc); - } - //barrier(nc); - - } - //barrier(nc); - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - /* - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - */ - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - - //printf("input1_data"); -exit(0); - -} diff --git a/mt/bt_matmul/dataset.h b/mt/bt_matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/bt_matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/bt_matmul/matmul.c~ b/mt/bt_matmul/matmul.c~ deleted file mode 100644 index 99ac845..0000000 --- a/mt/bt_matmul/matmul.c~ +++ /dev/null @@ -1,260 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - int i, j, k; - int temp0, temp1,temp2,temp3,temp4,temp5,temp6,temp7; - int start = coreid*lda/2; - int end = start + lda/2; - int j_lda; - int temp_i; - int temp_A0, temp_A1, temp_A2, temp_A3 ; - - for ( i = start; i < end; i+=8){ - for ( j = 0; j < lda; j++) - { - j_lda = j*lda; - temp0 = C[(i+0) + j_lda]; - temp1 = C[(i+1) + j_lda]; - temp2 = C[(i+2) + j_lda]; - temp3 = C[(i+3) + j_lda]; - temp4 = C[(i+4) + j_lda]; - temp5 = C[(i+5) + j_lda]; - temp6 = C[(i+6) + j_lda]; - temp7 = C[(i+7) + j_lda]; - - - - for ( k = 0; k < lda; k+=4) - { - temp_i = i; - temp_A0 = A[j_lda + (k+0)] ; - temp_A1 = A[j_lda + (k+1)] ; - temp_A2 = A[j_lda + (k+2)] ; - temp_A3 = A[j_lda + (k+3)] ; - - - temp0 += temp_A0 * B[(k+0)*lda + temp_i]; - temp0 += temp_A1 * B[(k+1)*lda + temp_i]; - temp0 += temp_A2 * B[(k+2)*lda + temp_i]; - temp0 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - temp1 += temp_A0 * B[(k+0)*lda + temp_i]; - temp1 += temp_A1 * B[(k+1)*lda + temp_i]; - temp1 += temp_A2 * B[(k+2)*lda + temp_i]; - temp1 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - temp2 += temp_A0 * B[(k+0)*lda + temp_i]; - temp2 += temp_A1 * B[(k+1)*lda + temp_i]; - temp2 += temp_A2 * B[(k+2)*lda + temp_i]; - temp2 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - - temp3 += temp_A0 * B[(k+0)*lda + temp_i]; - temp3 += temp_A1 * B[(k+1)*lda + temp_i]; - temp3 += temp_A2 * B[(k+2)*lda + temp_i]; - temp3 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - temp4 += temp_A0 * B[(k+0)*lda + temp_i]; - temp4 += temp_A1 * B[(k+1)*lda + temp_i]; - temp4 += temp_A2 * B[(k+2)*lda + temp_i]; - temp4 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - temp5 += temp_A0 * B[(k+0)*lda + temp_i]; - temp5 += temp_A1 * B[(k+1)*lda + temp_i]; - temp5 += temp_A2 * B[(k+2)*lda + temp_i]; - temp5 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - temp6 += temp_A0 * B[(k+0)*lda + temp_i]; - temp6 += temp_A1 * B[(k+1)*lda + temp_i]; - temp6 += temp_A2 * B[(k+2)*lda + temp_i]; - temp6 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - - temp7 += temp_A0 * B[(k+0)*lda + temp_i]; - temp7 += temp_A1 * B[(k+1)*lda + temp_i]; - temp7 += temp_A2 * B[(k+2)*lda + temp_i]; - temp7 += temp_A3 * B[(k+3)*lda + temp_i]; - temp_i++; - - } - - C[i + j*lda] = temp0; - C[(i+1) + j*lda] = temp1; - C[(i+2) + j*lda] = temp2; - C[(i+3) + j*lda] = temp3; - C[(i+4) + j*lda] = temp4; - C[(i+5) + j*lda] = temp5; - C[(i+6) + j*lda] = temp6; - C[(i+7) + j*lda] = temp7; - - } - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - /* - // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(); - - */ - // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); - -#ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); - - exit(0); -} diff --git a/mt/bt_matmul/matmul_gendata.pl b/mt/bt_matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/bt_matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/bt_matmul/matmul_mi.c b/mt/bt_matmul/matmul_mi.c deleted file mode 100755 index 181314e..0000000 --- a/mt/bt_matmul/matmul_mi.c +++ /dev/null @@ -1,297 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - - int i, j, k , jj , kk; - int start_i = coreid*lda/2; - int end_i = start_i + lda/2; - int step_j, step_k; - int start_k, end_k, start_j, end_j; - int j_lda; - int pos_A , pos_B, pos_C; - data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07; - data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17; - data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7; - - - if (coreid == 0) - { - step_k = 1; - start_k= 0; - end_k = lda; - - step_j = 2; - start_j= 0; - end_j = lda; - - }else - { - - step_k = -1; - start_k = lda-1; - end_k = -1; - - step_j = -2; - start_j= lda-2; - end_j = -2; - } - - for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) ) - { - for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) ) - { - for ( i = start_i; i < end_i; i+=8 ) - { - //pos_C = i + jj*lda; - for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j ) - { - - pos_C = i + j*lda; - temp00 = C[(pos_C + 0)]; - temp01 = C[(pos_C + 1)]; - temp02 = C[(pos_C + 2)]; - temp03 = C[(pos_C + 3)]; - temp04 = C[(pos_C + 4)]; - temp05 = C[(pos_C + 5)]; - temp06 = C[(pos_C + 6)]; - temp07 = C[(pos_C + 7)]; - - //pos_C += lda; - pos_C = i + (j+1)*lda; - - temp10 = C[(pos_C + 0)]; - temp11 = C[(pos_C + 1)]; - temp12 = C[(pos_C + 2)]; - temp13 = C[(pos_C + 3)]; - temp14 = C[(pos_C + 4)]; - temp15 = C[(pos_C + 5)]; - temp16 = C[(pos_C + 6)]; - temp17 = C[(pos_C + 7)]; - - pos_B = kk*lda + i; - pos_A = j*lda + kk; - for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k ) - { - temp_A0 = A[ pos_A ] ; - temp_A1 = A[pos_A +lda]; - - temp00 += temp_A0 * B[(pos_B + 0)]; - temp01 += temp_A0 * B[(pos_B + 1)]; - temp02 += temp_A0 * B[(pos_B + 2)]; - temp03 += temp_A0 * B[(pos_B + 3)]; - temp04 += temp_A0 * B[(pos_B + 4)]; - temp05 += temp_A0 * B[(pos_B + 5)]; - temp06 += temp_A0 * B[(pos_B + 6)]; - temp07 += temp_A0 * B[(pos_B + 7)]; - - temp10 += temp_A1 * B[(pos_B + 0)]; - temp11 += temp_A1 * B[(pos_B + 1)]; - temp12 += temp_A1 * B[(pos_B + 2)]; - temp13 += temp_A1 * B[(pos_B + 3)]; - temp14 += temp_A1 * B[(pos_B + 4)]; - temp15 += temp_A1 * B[(pos_B + 5)]; - temp16 += temp_A1 * B[(pos_B + 6)]; - temp17 += temp_A1 * B[(pos_B + 7)]; - - pos_B += (lda*step_k) ; - pos_A += step_k; - } - //barrier(nc); - - C[(pos_C + 0)] = temp10; - C[(pos_C + 1)] = temp11; - C[(pos_C + 2)] = temp12; - C[(pos_C + 3)] = temp13; - C[(pos_C + 4)] = temp14; - C[(pos_C + 5)] = temp15; - C[(pos_C + 6)] = temp16; - C[(pos_C + 7)] = temp17; - //barrier(nc); - - pos_C = i + j*lda; - //pos_C -= lda; - C[(pos_C + 0)] = temp00; - C[(pos_C + 1)] = temp01; - C[(pos_C + 2)] = temp02; - C[(pos_C + 3)] = temp03; - C[(pos_C + 4)] = temp04; - C[(pos_C + 5)] = temp05; - C[(pos_C + 6)] = temp06; - C[(pos_C + 7)] = temp07; - //barrier(nc); - //pos_C += step_j * lda; - } - //barrier(nc); - } - //barrier(nc); - - } - //barrier(nc); - } -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - /* - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - */ - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - - //printf("input1_data"); -exit(0); - -} diff --git a/mt/bt_vvadd/bt_vvadd.c b/mt/bt_vvadd/bt_vvadd.c deleted file mode 100755 index 7af3562..0000000 --- a/mt/bt_vvadd/bt_vvadd.c +++ /dev/null @@ -1,173 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - size_t i; - size_t chunk_size = n/ncores; - size_t start = chunk_size * coreid; - size_t end = start + chunk_size; - - for( i = start ; i < end; i++ ) - { - x[i]=x[i]+y[i]; - } - // ***************************** // -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/bt_vvadd/dataset.h b/mt/bt_vvadd/dataset.h deleted file mode 100755 index ce9f936..0000000 --- a/mt/bt_vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/bt_vvadd/vvadd_gendata.pl b/mt/bt_vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/bt_vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/ce_matmul.c b/mt/ce_matmul.c new file mode 100755 index 0000000..49ad295 --- /dev/null +++ b/mt/ce_matmul.c @@ -0,0 +1,157 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + data_t a1; + data_t a2; + data_t a3; + data_t a4; + data_t a5; + data_t a6; + data_t a7; + data_t a8; + data_t *b1; + data_t *b2; + data_t *b3; + data_t *b4; + data_t *b5; + data_t *b6; + data_t *b7; + data_t *b8; + data_t c1; + data_t c2; + data_t c3; + data_t c4; + data_t c5; + data_t c6; + data_t c7; + data_t c8; + int i, j, k; + int start, end; + static data_t BB[1024]; + + + //transpose B + for ( k = 0; k < lda; k++) { + for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores); i++ ) { + BB[i*lda + k] = B[k*lda + i]; + } + barrier(ncores); + } + + for ( int x = 0; x < ncores; x++) { + //split the i values into two chunks so the threads don't interfere on the B loads + //this could be generalized if needed, but I won't bother since it would be tricky + //and we already know the size and numthreads + start = x * (32 / ncores); + end = (x+1) * (32 / ncores); + for ( i = start; i < end; i+=8 ) { + for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) { + c1=0;c2=0;c3=0;c4=0;c5=0;c6=0;c7=0;c8=0; + b1 = &BB[(i+0)*lda]; + b2 = &BB[(i+1)*lda]; + b3 = &BB[(i+2)*lda]; + b4 = &BB[(i+3)*lda]; + b5 = &BB[(i+4)*lda]; + b6 = &BB[(i+5)*lda]; + b7 = &BB[(i+6)*lda]; + b8 = &BB[(i+7)*lda]; + + for ( k = 0; k < lda; k+=8 ) { + a1 = A[j*lda + k+0]; + a2 = A[j*lda + k+1]; + a3 = A[j*lda + k+2]; + a4 = A[j*lda + k+3]; + a5 = A[j*lda + k+4]; + a6 = A[j*lda + k+5]; + a7 = A[j*lda + k+6]; + a8 = A[j*lda + k+7]; + + c1 += a1 * b1[k+0]; + c1 += a2 * b1[k+1]; + c1 += a3 * b1[k+2]; + c1 += a4 * b1[k+3]; + c1 += a5 * b1[k+4]; + c1 += a6 * b1[k+5]; + c1 += a7 * b1[k+6]; + c1 += a8 * b1[k+7]; + + c2 += a1 * b2[k+0]; + c2 += a2 * b2[k+1]; + c2 += a3 * b2[k+2]; + c2 += a4 * b2[k+3]; + c2 += a5 * b2[k+4]; + c2 += a6 * b2[k+5]; + c2 += a7 * b2[k+6]; + c2 += a8 * b2[k+7]; + + c3 += a1 * b3[k+0]; + c3 += a2 * b3[k+1]; + c3 += a3 * b3[k+2]; + c3 += a4 * b3[k+3]; + c3 += a5 * b3[k+4]; + c3 += a6 * b3[k+5]; + c3 += a7 * b3[k+6]; + c3 += a8 * b3[k+7]; + + c4 += a1 * b4[k+0]; + c4 += a2 * b4[k+1]; + c4 += a3 * b4[k+2]; + c4 += a4 * b4[k+3]; + c4 += a5 * b4[k+4]; + c4 += a6 * b4[k+5]; + c4 += a7 * b4[k+6]; + c4 += a8 * b4[k+7]; + + c5 += a1 * b5[k+0]; + c5 += a2 * b5[k+1]; + c5 += a3 * b5[k+2]; + c5 += a4 * b5[k+3]; + c5 += a5 * b5[k+4]; + c5 += a6 * b5[k+5]; + c5 += a7 * b5[k+6]; + c5 += a8 * b5[k+7]; + + c6 += a1 * b6[k+0]; + c6 += a2 * b6[k+1]; + c6 += a3 * b6[k+2]; + c6 += a4 * b6[k+3]; + c6 += a5 * b6[k+4]; + c6 += a6 * b6[k+5]; + c6 += a7 * b6[k+6]; + c6 += a8 * b6[k+7]; + + c7 += a1 * b7[k+0]; + c7 += a2 * b7[k+1]; + c7 += a3 * b7[k+2]; + c7 += a4 * b7[k+3]; + c7 += a5 * b7[k+4]; + c7 += a6 * b7[k+5]; + c7 += a7 * b7[k+6]; + c7 += a8 * b7[k+7]; + + c8 += a1 * b8[k+0]; + c8 += a2 * b8[k+1]; + c8 += a3 * b8[k+2]; + c8 += a4 * b8[k+3]; + c8 += a5 * b8[k+4]; + c8 += a6 * b8[k+5]; + c8 += a7 * b8[k+6]; + c8 += a8 * b8[k+7]; + } + C[i+0 + j*lda] += c1; + C[i+1 + j*lda] += c2; + C[i+2 + j*lda] += c3; + C[i+3 + j*lda] += c4; + C[i+4 + j*lda] += c5; + C[i+5 + j*lda] += c6; + C[i+6 + j*lda] += c7; + C[i+7 + j*lda] += c8; + } + } + } +} diff --git a/mt/cf_matmul.c b/mt/cf_matmul.c new file mode 100644 index 0000000..ada9dec --- /dev/null +++ b/mt/cf_matmul.c @@ -0,0 +1,100 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + if(coreid > 1) return; + int i,j,k,l; + data_t element1, element2, element3, element4, element5, element6, element7, element8; + int row, row2; + int column1, column2, column3, column4, column5, column6, column7, column8; + data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + if (coreid == 0){ + for (i=0; i<32; i+=2){ + row = i*32; + row2 = (i+1)*32; + for (j=0; j<16; j+=4){ + element1 = A[row+j]; + element2 = A[row+j+1]; + element3 = A[row+j+2]; + element4 = A[row+j+3]; + column1 = j*32; + column2 = (j+1)*32; + column3 = (j+2)*32; + column4 = (j+3)*32; + element5 = A[row2+j]; + element6 = A[row2+j+1]; + element7 = A[row2+j+2]; + element8 = A[row2+j+3]; + + for (k=0; k<32; k+=4){ + temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; + temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; + temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; + temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; + temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; + temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; + temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; + temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; + } + + + } + for (l=0; l<32; l++){ + C[row+l]+=temp[l]; + C[row2+l]+=temp2[l]; + temp[l]=0; + temp2[l]=0; + } + + } + } + if(coreid == 1 || ncores == 1) { + for (i=0; i<32; i+=2){ + row = (31-i)*32; + row2 = (31-i-1)*32; + for (j=16; j<32; j+=4){ + element1 = A[row+j]; + element2 = A[row+j+1]; + element3 = A[row+j+2]; + element4 = A[row+j+3]; + element5 = A[row2+j]; + element6 = A[row2+j+1]; + element7 = A[row2+j+2]; + element8 = A[row2+j+3]; + column1 = j*32; + column2 = (j+1)*32; + column3 = (j+2)*32; + column4 = (j+3)*32; + for (k=0; k<32; k+=4){ + temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k]; + temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1]; + temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2]; + temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3]; + temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k]; + temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1]; + temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2]; + temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3]; + } + + + + } + for (l=0; l<32; l++){ + C[row+l]+=temp[l]; + C[row2+l]+=temp2[l]; + temp[l]=0; + temp2[l]=0; + } + } + } + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + +} diff --git a/mt/cg_matmul.c b/mt/cg_matmul.c new file mode 100755 index 0000000..9db30cd --- /dev/null +++ b/mt/cg_matmul.c @@ -0,0 +1,78 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k; + + for ( i = 0; i < lda; i+=2 ) + { + for (k = 0; k < lda; k+=4) + { + int d0 = B[k*lda + i]; + int c0 = B[k*lda + i + 1]; + int d1 = B[(k+1)*lda + i]; + int c1 = B[(k+1)*lda + i + 1]; + int d2 = B[(k+2)*lda + i]; + int c2 = B[(k+2)*lda + i + 1]; + int d3 = B[(k+3)*lda + i]; + int c3 = B[(k+3)*lda + i + 1]; + + for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4) + { + + int sum = A[j*lda + k] * d0; + sum += A[j*lda + k + 1] * d1; + sum += A[j*lda + k + 2] * d2; + sum += A[j*lda + k + 3] * d3; + C[j*lda +i] += sum; + + sum = A[j*lda + k] * c0; + sum += A[j*lda + k + 1] * c1; + sum += A[j*lda + k + 2] * c2; + sum += A[j*lda + k + 3] * c3; + C[j*lda + i + 1] += sum; + + sum = A[(j+1)*lda + k] * d0; + sum += A[(j+1)*lda + k + 1] * d1; + sum += A[(j+1)*lda + k + 2] * d2; + sum += A[(j+1)*lda + k + 3] * d3; + C[(j+1)*lda +i] += sum; + + sum = A[(j+1)*lda + k] * c0; + sum += A[(j+1)*lda + k + 1] * c1; + sum += A[(j+1)*lda + k + 2] * c2; + sum += A[(j+1)*lda + k + 3] * c3; + C[(j+1)*lda + i + 1] += sum; + + sum = A[(j+2)*lda + k] * d0; + sum += A[(j+2)*lda + k + 1] * d1; + sum += A[(j+2)*lda + k + 2] * d2; + sum += A[(j+2)*lda + k + 3] * d3; + C[(j+2)*lda +i] += sum; + + sum = A[(j+2)*lda + k] * c0; + sum += A[(j+2)*lda + k + 1] * c1; + sum += A[(j+2)*lda + k + 2] * c2; + sum += A[(j+2)*lda + k + 3] * c3; + C[(j+2)*lda + i + 1] += sum; + + sum = A[(j+3)*lda + k] * d0; + sum += A[(j+3)*lda + k + 1] * d1; + sum += A[(j+3)*lda + k + 2] * d2; + sum += A[(j+3)*lda + k + 3] * d3; + C[(j+3)*lda +i] += sum; + + sum = A[(j+3)*lda + k] * c0; + sum += A[(j+3)*lda + k + 1] * c1; + sum += A[(j+3)*lda + k + 2] * c2; + sum += A[(j+3)*lda + k + 3] * c3; + C[(j+3)*lda + i + 1] += sum; + + } + barrier(ncores); + } + } +} diff --git a/mt/ci_matmul.c b/mt/ci_matmul.c new file mode 100755 index 0000000..3b7977d --- /dev/null +++ b/mt/ci_matmul.c @@ -0,0 +1,70 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + +//----------MSI-------------- +/* + int i,j,k; + barrier(nc); + for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { + for(i = 0; i < lda; i+=4) { + data_t Cval0 = 0; + data_t Cval1 = 0; + data_t Cval2 = 0; + data_t Cval3 = 0; + for(k = 0; k < lda; k++) { + Cval0 += A[j*lda+k]*B[k*lda+i]; + Cval1 += A[j*lda+k]*B[k*lda+i+1]; + Cval2 += A[j*lda+k]*B[k*lda+i+2]; + Cval3 += A[j*lda+k]*B[k*lda+i+3]; + } + C[j*lda+i] = Cval0; + C[j*lda+i+1] = Cval1; + C[j*lda+i+2] = Cval2; + C[j*lda+i+3] = Cval3; + } + } +*/ + +//------------------MI------------------- + + int i,j,k; + barrier(ncores); + for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { + for(i = 0; i < lda; i+=4) { + data_t Cval0 = 0; + data_t Cval1 = 0; + data_t Cval2 = 0; + data_t Cval3 = 0; + if(coreid == 0) { + for(k = 0; k < lda; k++) { + Cval0 += A[j*lda+k]*B[k*lda+i]; + Cval1 += A[j*lda+k]*B[k*lda+i+1]; + Cval2 += A[j*lda+k]*B[k*lda+i+2]; + Cval3 += A[j*lda+k]*B[k*lda+i+3]; + } + } else { + for(k = lda-1; k >= 0; k--) { + Cval0 += A[j*lda+k]*B[k*lda+i]; + Cval1 += A[j*lda+k]*B[k*lda+i+1]; + Cval2 += A[j*lda+k]*B[k*lda+i+2]; + Cval3 += A[j*lda+k]*B[k*lda+i+3]; + } + } + C[j*lda+i] = Cval0; + C[j*lda+i+1] = Cval1; + C[j*lda+i+2] = Cval2; + C[j*lda+i+3] = Cval3; + } + } +} diff --git a/mt/ck_matmul.c b/mt/ck_matmul.c new file mode 100755 index 0000000..753a36d --- /dev/null +++ b/mt/ck_matmul.c @@ -0,0 +1,61 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int i, j, k, ii, jj, bsize, start; + bsize = 16; + start = bsize*coreid; + for ( jj = start; jj < lda; jj += bsize*ncores) { + int first = 1; + for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) { + first = 0; + for ( j = jj; j < lda && j < jj + bsize; j+=4) { + for ( i = ii; i < lda && i < ii + bsize; i+=2) { + data_t c1 = C[i + j*lda]; + data_t c2 = C[i + j*lda + 1]; + data_t c3 = C[i + (j+1)*lda]; + data_t c4 = C[i + (j+1)*lda + 1]; + data_t c5 = C[i + (j+2)*lda]; + data_t c6 = C[i + (j+2)*lda + 1]; + data_t c7 = C[i + (j+3)*lda]; + data_t c8 = C[i + (j+3)*lda + 1]; + for ( k = 0; k < lda; k+=8){ + for (int x = 0; x < 8; x++) { + data_t a = A[j*lda + k+x]; + data_t a1 = A[(j+1)*lda +k+x]; + data_t a2 = A[(j+2)*lda +k+x]; + data_t a3 = A[(j+3)*lda +k+x]; + data_t b1 = B[(k+x)*lda + i]; + data_t b2 = B[(k+x)*lda + i + 1]; + c1 += a * b1; + c2 += a * b2; + c3 += a1* b1; + c4 += a1* b2; + c5 += a2* b1; + c6 += a2* b2; + c7 += a3* b1; + c8 += a3* b2; + } + } + C[i + j*lda] = c1; + C[i + j*lda + 1] = c2; + C[i + (j+1)*lda] = c3; + C[i + (j+1)*lda + 1] = c4; + C[i + (j+2)*lda] = c5; + C[i + (j+2)*lda + 1] = c6; + C[i + (j+3)*lda] = c7; + C[i + (j+3)*lda + 1] = c8; + } + } + } + } +} diff --git a/mt/cl_matmul.c b/mt/cl_matmul.c new file mode 100644 index 0000000..086a614 --- /dev/null +++ b/mt/cl_matmul.c @@ -0,0 +1,176 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + if(coreid > 1) return; + // feel free to make a separate function for MI and MSI versions. + int i, j, k, x; + data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; + + + if(coreid == 0) { + for(j = 0; j < 32; j++) { + temp0 = C[j*lda]; + temp1 = C[1 + j*lda]; + temp2 = C[2 + j*lda]; + temp3 = C[3 + j*lda]; + temp4 = C[4 + j*lda]; + temp5 = C[5 + j*lda]; + temp6 = C[6 + j*lda]; + temp7 = C[7 + j*lda]; + temp8 = C[8 + j*lda]; + temp9 = C[9 + j*lda]; + temp10 = C[10 + j*lda]; + temp11 = C[11 + j*lda]; + temp12 = C[12 + j*lda]; + temp13 = C[13 + j*lda]; + temp14 = C[14 + j*lda]; + temp15 = C[15 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[k*lda]; + temp1 += A[j*lda + k] * B[1 + k*lda]; + temp2 += A[j*lda + k] * B[2 + k*lda]; + temp3 += A[j*lda + k] * B[3 + k*lda]; + temp4 += A[j*lda + k] * B[4 + k*lda]; + temp5 += A[j*lda + k] * B[5 + k*lda]; + temp6 += A[j*lda + k] * B[6 + k*lda]; + temp7 += A[j*lda + k] * B[7 + k*lda]; + temp8 += A[j*lda + k] * B[8 + k*lda]; + temp9 += A[j*lda + k] * B[9 + k*lda]; + temp10 += A[j*lda + k] * B[10 + k*lda]; + temp11 += A[j*lda + k] * B[11 + k*lda]; + temp12 += A[j*lda + k] * B[12 + k*lda]; + temp13 += A[j*lda + k] * B[13 + k*lda]; + temp14 += A[j*lda + k] * B[14 + k*lda]; + temp15 += A[j*lda + k] * B[15 + k*lda]; + } + C[j*lda] = temp0; + C[1 + j*lda] = temp1; + C[2 + j*lda] = temp2; + C[3 + j*lda] = temp3; + C[4 + j*lda] = temp4; + C[5 + j*lda] = temp5; + C[6 + j*lda] = temp6; + C[7 + j*lda] = temp7; + C[8 + j*lda] = temp8; + C[9 + j*lda] = temp9; + C[10 + j*lda] = temp10; + C[11 + j*lda] = temp11; + C[12 + j*lda] = temp12; + C[13 + j*lda] = temp13; + C[14 + j*lda] = temp14; + C[15 + j*lda] = temp15; + } + } + + if(coreid == 1 || ncores == 1) { + for(j = 16; j < 32; j++) { + temp0 = C[16 + j*lda]; + temp1 = C[17 + j*lda]; + temp2 = C[18 + j*lda]; + temp3 = C[19 + j*lda]; + temp4 = C[20 + j*lda]; + temp5 = C[21 + j*lda]; + temp6 = C[22 + j*lda]; + temp7 = C[23 + j*lda]; + temp8 = C[24 + j*lda]; + temp9 = C[25 + j*lda]; + temp10 = C[26 + j*lda]; + temp11 = C[27 + j*lda]; + temp12 = C[28 + j*lda]; + temp13 = C[29 + j*lda]; + temp14 = C[30 + j*lda]; + temp15 = C[31 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[16 + k*lda]; + temp1 += A[j*lda + k] * B[17 + k*lda]; + temp2 += A[j*lda + k] * B[18 + k*lda]; + temp3 += A[j*lda + k] * B[19 + k*lda]; + temp4 += A[j*lda + k] * B[20 + k*lda]; + temp5 += A[j*lda + k] * B[21 + k*lda]; + temp6 += A[j*lda + k] * B[22 + k*lda]; + temp7 += A[j*lda + k] * B[23 + k*lda]; + temp8 += A[j*lda + k] * B[24 + k*lda]; + temp9 += A[j*lda + k] * B[25 + k*lda]; + temp10 += A[j*lda + k] * B[26 + k*lda]; + temp11 += A[j*lda + k] * B[27 + k*lda]; + temp12 += A[j*lda + k] * B[28 + k*lda]; + temp13 += A[j*lda + k] * B[29 + k*lda]; + temp14 += A[j*lda + k] * B[30 + k*lda]; + temp15 += A[j*lda + k] * B[31 + k*lda]; + } + C[16 + j*lda] = temp0; + C[17 + j*lda] = temp1; + C[18 + j*lda] = temp2; + C[19 + j*lda] = temp3; + C[20 + j*lda] = temp4; + C[21 + j*lda] = temp5; + C[22 + j*lda] = temp6; + C[23 + j*lda] = temp7; + C[24 + j*lda] = temp8; + C[25 + j*lda] = temp9; + C[26 + j*lda] = temp10; + C[27 + j*lda] = temp11; + C[28 + j*lda] = temp12; + C[29 + j*lda] = temp13; + C[30 + j*lda] = temp14; + C[31 + j*lda] = temp15; + } + for(j = 0; j <16; j++) { + temp0 = C[16 + j*lda]; + temp1 = C[17 + j*lda]; + temp2 = C[18 + j*lda]; + temp3 = C[19 + j*lda]; + temp4 = C[20 + j*lda]; + temp5 = C[21 + j*lda]; + temp6 = C[22 + j*lda]; + temp7 = C[23 + j*lda]; + temp8 = C[24 + j*lda]; + temp9 = C[25 + j*lda]; + temp10 = C[26 + j*lda]; + temp11 = C[27 + j*lda]; + temp12 = C[28 + j*lda]; + temp13 = C[29 + j*lda]; + temp14 = C[30 + j*lda]; + temp15 = C[31 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[16 + k*lda]; + temp1 += A[j*lda + k] * B[17 + k*lda]; + temp2 += A[j*lda + k] * B[18 + k*lda]; + temp3 += A[j*lda + k] * B[19 + k*lda]; + temp4 += A[j*lda + k] * B[20 + k*lda]; + temp5 += A[j*lda + k] * B[21 + k*lda]; + temp6 += A[j*lda + k] * B[22 + k*lda]; + temp7 += A[j*lda + k] * B[23 + k*lda]; + temp8 += A[j*lda + k] * B[24 + k*lda]; + temp9 += A[j*lda + k] * B[25 + k*lda]; + temp10 += A[j*lda + k] * B[26 + k*lda]; + temp11 += A[j*lda + k] * B[27 + k*lda]; + temp12 += A[j*lda + k] * B[28 + k*lda]; + temp13 += A[j*lda + k] * B[29 + k*lda]; + temp14 += A[j*lda + k] * B[30 + k*lda]; + temp15 += A[j*lda + k] * B[31 + k*lda]; + } + C[16 + j*lda] = temp0; + C[17 + j*lda] = temp1; + C[18 + j*lda] = temp2; + C[19 + j*lda] = temp3; + C[20 + j*lda] = temp4; + C[21 + j*lda] = temp5; + C[22 + j*lda] = temp6; + C[23 + j*lda] = temp7; + C[24 + j*lda] = temp8; + C[25 + j*lda] = temp9; + C[26 + j*lda] = temp10; + C[27 + j*lda] = temp11; + C[28 + j*lda] = temp12; + C[29 + j*lda] = temp13; + C[30 + j*lda] = temp14; + C[31 + j*lda] = temp15; + } + } +} diff --git a/mt/cm_matmul.c b/mt/cm_matmul.c new file mode 100755 index 0000000..ae52b27 --- /dev/null +++ b/mt/cm_matmul.c @@ -0,0 +1,91 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i,j,k,l; + data_t element1, element2, element3, element4, element5, element6, element7, element8; + int row, row2; + int column1, column2, column3, column4, column5, column6, column7, column8; + data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + if (coreid == 0){ + for (i=0; i 1) return; + static __thread int i, j, k; + static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7; + static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15; + + static __thread int start, end, jStride, jToRow, jToCol; + static data_t A1[1024], B1[1024];; + + start = coreid << 9; + end = ((ncores == 1) ? 2 :(coreid+1)) << 9; + jStride = 8; + + if (coreid == 0) { + for (j=start; j < end; j+=jStride) { + jToRow = (j>>5)<<5; + jToCol = j%32; + tempC0 = 0; + tempC1 = 0; + tempC2 = 0; + tempC3 = 0; + tempC4 = 0; + tempC5 = 0; + tempC6 = 0; + tempC7 = 0; + for ( i=0; i < lda; i+=2 ) { + tempA0 = A[i + jToRow]; + tempA1 = A[i+1 + jToRow]; + tempC0 += tempA0 * B[(jToCol ) + (i<<5)]; + tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)]; + tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)]; + tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)]; + tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)]; + tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)]; + tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)]; + tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)]; + tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)]; + tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)]; + tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)]; + tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)]; + tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)]; + tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)]; + tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)]; + tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)]; + } + C[j] =tempC0; + C[j + 1 ]=tempC1; + C[j + 2 ]=tempC2; + C[j + 3 ]=tempC3; + C[j + 4 ]=tempC4; + C[j + 5 ]=tempC5; + C[j + 6 ]=tempC6; + C[j + 7 ]=tempC7; + } + } + else { + for (i = 0; i < 1024; i++) { + A1[i] = A[i]; + B1[i] = B[i]; + } + for (j=start; j < end; j+=jStride) { + jToRow = (j>>5)<<5; + jToCol = j%32; + tempC0 = 0; + tempC1 = 0; + tempC2 = 0; + tempC3 = 0; + tempC4 = 0; + tempC5 = 0; + tempC6 = 0; + tempC7 = 0; + for ( i=0; i < lda; i+=2 ) { + tempA0 = A1[i + jToRow]; + tempA1 = A1[i+1 + jToRow]; + tempC0 += tempA0 * B1[(jToCol ) + (i<<5)]; + tempC1 += tempA0 * B1[(jToCol+1 ) + (i<<5)]; + tempC2 += tempA0 * B1[(jToCol+2 ) + (i<<5)]; + tempC3 += tempA0 * B1[(jToCol+3 ) + (i<<5)]; + tempC4 += tempA0 * B1[(jToCol+4 ) + (i<<5)]; + tempC5 += tempA0 * B1[(jToCol+5 ) + (i<<5)]; + tempC6 += tempA0 * B1[(jToCol+6 ) + (i<<5)]; + tempC7 += tempA0 * B1[(jToCol+7 ) + (i<<5)]; + tempC0 += tempA1 * B1[(jToCol ) + ((i+1)<<5)]; + tempC1 += tempA1 * B1[(jToCol+1 ) + ((i+1)<<5)]; + tempC2 += tempA1 * B1[(jToCol+2 ) + ((i+1)<<5)]; + tempC3 += tempA1 * B1[(jToCol+3 ) + ((i+1)<<5)]; + tempC4 += tempA1 * B1[(jToCol+4 ) + ((i+1)<<5)]; + tempC5 += tempA1 * B1[(jToCol+5 ) + ((i+1)<<5)]; + tempC6 += tempA1 * B1[(jToCol+6 ) + ((i+1)<<5)]; + tempC7 += tempA1 * B1[(jToCol+7 ) + ((i+1)<<5)]; + } + C[j] =tempC0; + C[j + 1 ]=tempC1; + C[j + 2 ]=tempC2; + C[j + 3 ]=tempC3; + C[j + 4 ]=tempC4; + C[j + 5 ]=tempC5; + C[j + 6 ]=tempC6; + C[j + 7 ]=tempC7; + } + } +} diff --git a/mt/dc_matmul.c b/mt/dc_matmul.c new file mode 100755 index 0000000..a2b583e --- /dev/null +++ b/mt/dc_matmul.c @@ -0,0 +1,168 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" + +#define REG_I 8 +#define REG_J 2 +#define BLOCK_I 32 +#define BLOCK_J 16 +#define BLOCK_K 16 +#define LDA 32 +#define NCORES 2 +#define MIN(X,Y) (X < Y ? X : Y) + +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + + int i, j, k, ri, rj, ii, jj, kk; + data_t *Aj, *Cj, *Bi; + data_t c[REG_I][REG_J], a[REG_J], b[REG_I]; + size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES)); + + /* if (coreid > 0) { */ + /* return; */ + /* } */ + /* start = 0, end = lda; */ + if (ncores == NCORES && lda == LDA) { + for (jj = start; jj < end; jj += BLOCK_J) { + int kk_start= (coreid == 0 ? 0 : LDA/2) ,kk_end = (coreid == 0 ? LDA/2 : LDA); + for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { + // for (ii = 0; ii < LDA; ii += BLOCK_I) + for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { + Aj = A + j*LDA; + Cj = C + j*LDA; + for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) { + /* Load C in register blocks. */ + Bi = B + i; + for (ri = 0; ri < REG_I; ri++) { + for (rj = 0; rj < REG_J; rj++) { + c[ri][rj] = Cj[i + ri + ( rj)*LDA]; + } + } + + + for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { + for (ri = 0; ri < REG_I; ri++) { + b[ri] = Bi[k*LDA + ri]; + } + /* Compute C in register blocks. */ + for (rj = 0; rj < REG_J; rj++) { + a[rj] = Aj[(rj)*LDA + k]; + for (ri = 0; ri < REG_I; ri++) { + c[ri][rj] += a[rj] * b[ri]; + } + } + } + + /* store C in register blocks. */ + for (ri = 0; ri < REG_I; ri++) { + for (rj = 0; rj < REG_J; rj++) { + Cj[i + ri + ( rj)*LDA] = c[ri][rj]; + } + } + } + } + /* barrier(nc); */ + + /* kk_start= (coreid == 1 ? 0 : LDA/2); */ + /* kk_end = (coreid == 1 ? LDA/2 : LDA); */ + /* for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { */ + /* // for (ii = 0; ii < LDA; ii += BLOCK_I) */ + /* for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { */ + /* Aj = A + j*LDA; */ + /* Cj = C + j*LDA; */ + /* for (i = 0; i < LDA/\*, ii + BLOCK_I)*\/; i += REG_I) { */ + /* /\* Load C in register blocks. *\/ */ + /* Bi = B + i; */ + /* for (ri = 0; ri < REG_I; ri++) { */ + /* for (rj = 0; rj < REG_J; rj++) { */ + /* c[ri][rj] = Cj[i + ri + ( rj)*LDA]; */ + /* } */ + /* } */ + + + /* for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { */ + /* for (ri = 0; ri < REG_I; ri++) { */ + /* b[ri] = Bi[k*LDA + ri]; */ + /* } */ + /* /\* Compute C in register blocks. *\/ */ + /* for (rj = 0; rj < REG_J; rj++) { */ + /* a[rj] = Aj[(rj)*LDA + k]; */ + /* for (ri = 0; ri < REG_I; ri++) { */ + /* c[ri][rj] += a[rj] * b[ri]; */ + /* } */ + /* } */ + /* } */ + + /* store C in register blocks. */ + /* for (ri = 0; ri < REG_I; ri++) { */ + /* for (rj = 0; rj < REG_J; rj++) { */ + /* Cj[i + ri + ( rj)*LDA] = c[ri][rj]; */ + /* } */ + /* } */ + /* } */ + /* } */ + } + } + + + //barrier(nc); + for (jj = start; jj < end; jj += BLOCK_J) { + int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA); + for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { + // for (ii = 0; ii < LDA; ii += BLOCK_I) + for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { + Aj = A + j*LDA; + Cj = C + j*LDA; + for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) { + /* Load C in register blocks. */ + Bi = B + i; + for (ri = 0; ri < REG_I; ri++) { + for (rj = 0; rj < REG_J; rj++) { + c[ri][rj] = Cj[i + ri + ( rj)*LDA]; + } + } + + + for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { + for (ri = 0; ri < REG_I; ri++) { + b[ri] = Bi[k*LDA + ri]; + } + /* Compute C in register blocks. */ + for (rj = 0; rj < REG_J; rj++) { + a[rj] = Aj[(rj)*LDA + k]; + for (ri = 0; ri < REG_I; ri++) { + c[ri][rj] += a[rj] * b[ri]; + } + } + } + + /* store C in register blocks. */ + for (ri = 0; ri < REG_I; ri++) { + for (rj = 0; rj < REG_J; rj++) { + Cj[i + ri + ( rj)*LDA] = c[ri][rj]; + } + } + } + } + } + } + /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */ +} else { + if (coreid > 0) + return; + + for ( i = 0; i < lda; i++ ) + for ( j = 0; j < lda; j++ ) + for ( k = 0; k < lda; k++ ) + C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; + } + } diff --git a/mt/df_matmul.c b/mt/df_matmul.c new file mode 100755 index 0000000..56d9c4b --- /dev/null +++ b/mt/df_matmul.c @@ -0,0 +1,237 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int j, k; + data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15; + if(coreid == 0) { + //16*0:16*(0+1) ;; 16*1+16*(1+1) + //0:16 ;; 16:32 + + //complete Q1 + for(j = 0; j < 16; j++) { + temp0 = C[j*lda]; + temp1 = C[1 + j*lda]; + temp2 = C[2 + j*lda]; + temp3 = C[3 + j*lda]; + temp4 = C[4 + j*lda]; + temp5 = C[5 + j*lda]; + temp6 = C[6 + j*lda]; + temp7 = C[7 + j*lda]; + temp8 = C[8 + j*lda]; + temp9 = C[9 + j*lda]; + temp10 = C[10 + j*lda]; + temp11 = C[11 + j*lda]; + temp12 = C[12 + j*lda]; + temp13 = C[13 + j*lda]; + temp14 = C[14 + j*lda]; + temp15 = C[15 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[k*lda]; + temp1 += A[j*lda + k] * B[1+k*lda]; + temp2 += A[j*lda + k] * B[2+k*lda]; + temp3 += A[j*lda + k] * B[3+k*lda]; + temp4 += A[j*lda + k] * B[4+k*lda]; + temp5 += A[j*lda + k] * B[5+k*lda]; + temp6 += A[j*lda + k] * B[6+k*lda]; + temp7 += A[j*lda + k] * B[7+k*lda]; + temp8 += A[j*lda + k] * B[8+k*lda]; + temp9 += A[j*lda + k] * B[9+k*lda]; + temp10 += A[j*lda + k] * B[10+k*lda]; + temp11 += A[j*lda + k] * B[11+k*lda]; + temp12 += A[j*lda + k] * B[12+k*lda]; + temp13 += A[j*lda + k] * B[13+k*lda]; + temp14 += A[j*lda + k] * B[14+k*lda]; + temp15 += A[j*lda + k] * B[15+k*lda]; + } + C[j*lda] = temp0; + C[1 + j*lda] = temp1; + C[2 + j*lda] = temp2; + C[3 + j*lda] = temp3; + C[4 + j*lda] = temp4; + C[5 + j*lda] = temp5; + C[6 + j*lda] = temp6; + C[7 + j*lda] = temp7; + C[8 + j*lda] = temp8; + C[9 + j*lda] = temp9; + C[10 + j*lda] = temp10; + C[11 + j*lda] = temp11; + C[12 + j*lda] = temp12; + C[13 + j*lda] = temp13; + C[14 + j*lda] = temp14; + C[15 + j*lda] = temp15; + } + for(j = 16; j < 32; j++) { + temp0 = C[j*lda]; + temp1 = C[1 + j*lda]; + temp2 = C[2 + j*lda]; + temp3 = C[3 + j*lda]; + temp4 = C[4 + j*lda]; + temp5 = C[5 + j*lda]; + temp6 = C[6 + j*lda]; + temp7 = C[7 + j*lda]; + temp8 = C[8 + j*lda]; + temp9 = C[9 + j*lda]; + temp10 = C[10 + j*lda]; + temp11 = C[11 + j*lda]; + temp12 = C[12 + j*lda]; + temp13 = C[13 + j*lda]; + temp14 = C[14 + j*lda]; + temp15 = C[15 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[k*lda]; + temp1 += A[j*lda + k] * B[1+k*lda]; + temp2 += A[j*lda + k] * B[2+k*lda]; + temp3 += A[j*lda + k] * B[3+k*lda]; + temp4 += A[j*lda + k] * B[4+k*lda]; + temp5 += A[j*lda + k] * B[5+k*lda]; + temp6 += A[j*lda + k] * B[6+k*lda]; + temp7 += A[j*lda + k] * B[7+k*lda]; + temp8 += A[j*lda + k] * B[8+k*lda]; + temp9 += A[j*lda + k] * B[9+k*lda]; + temp10 += A[j*lda + k] * B[10+k*lda]; + temp11 += A[j*lda + k] * B[11+k*lda]; + temp12 += A[j*lda + k] * B[12+k*lda]; + temp13 += A[j*lda + k] * B[13+k*lda]; + temp14 += A[j*lda + k] * B[14+k*lda]; + temp15 += A[j*lda + k] * B[15+k*lda]; + } + C[j*lda] = temp0; + C[1 + j*lda] = temp1; + C[2 + j*lda] = temp2; + C[3 + j*lda] = temp3; + C[4 + j*lda] = temp4; + C[5 + j*lda] = temp5; + C[6 + j*lda] = temp6; + C[7 + j*lda] = temp7; + C[8 + j*lda] = temp8; + C[9 + j*lda] = temp9; + C[10 + j*lda] = temp10; + C[11 + j*lda] = temp11; + C[12 + j*lda] = temp12; + C[13 + j*lda] = temp13; + C[14 + j*lda] = temp14; + C[15 + j*lda] = temp15; + } + } + //16*(2-1) : 16*2 ;; 16*(1-1) : 16*1 + //16:32 ;; 0:16 + if(coreid == 1 || ncores == 1) { + //complete Q3 + for(j = 16; j < 32; j++) { + temp0 = C[16+j*lda]; + temp1 = C[17+j*lda]; + temp2 = C[18+j*lda]; + temp3 = C[19+j*lda]; + temp4 = C[20+j*lda]; + temp5 = C[21+j*lda]; + temp6 = C[22+j*lda]; + temp7 = C[23+j*lda]; + temp8 = C[24+j*lda]; + temp9 = C[25+j*lda]; + temp10 = C[26+j*lda]; + temp11 = C[27+j*lda]; + temp12 = C[28+j*lda]; + temp13 = C[29+j*lda]; + temp14 = C[30+j*lda]; + temp15 = C[31+j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[16+k*lda]; + temp1 += A[j*lda + k] * B[17+k*lda]; + temp2 += A[j*lda + k] * B[18+k*lda]; + temp3 += A[j*lda + k] * B[19+k*lda]; + temp4 += A[j*lda + k] * B[20+k*lda]; + temp5 += A[j*lda + k] * B[21+k*lda]; + temp6 += A[j*lda + k] * B[22+k*lda]; + temp7 += A[j*lda + k] * B[23+k*lda]; + temp8 += A[j*lda + k] * B[24+k*lda]; + temp9 += A[j*lda + k] * B[25+k*lda]; + temp10 += A[j*lda + k] * B[26+k*lda]; + temp11 += A[j*lda + k] * B[27+k*lda]; + temp12 += A[j*lda + k] * B[28+k*lda]; + temp13 += A[j*lda + k] * B[29+k*lda]; + temp14 += A[j*lda + k] * B[30+k*lda]; + temp15 += A[j*lda + k] * B[31+k*lda]; + } + C[16 + j*lda] = temp0; + C[17 + j*lda] = temp1; + C[18 + j*lda] = temp2; + C[19 + j*lda] = temp3; + C[20 + j*lda] = temp4; + C[21 + j*lda] = temp5; + C[22 + j*lda] = temp6; + C[23 + j*lda] = temp7; + C[24 + j*lda] = temp8; + C[25 + j*lda] = temp9; + C[26 + j*lda] = temp10; + C[27 + j*lda] = temp11; + C[28 + j*lda] = temp12; + C[29 + j*lda] = temp13; + C[30 + j*lda] = temp14; + C[31 + j*lda] = temp15; + } + //complete Q4 + for(j = 0; j < 16; j++) { + temp0 = C[16 + j*lda]; + temp1 = C[17 + j*lda]; + temp2 = C[18 + j*lda]; + temp3 = C[19 + j*lda]; + temp4 = C[20 + j*lda]; + temp5 = C[21 + j*lda]; + temp6 = C[22 + j*lda]; + temp7 = C[23 + j*lda]; + temp8 = C[24 + j*lda]; + temp9 = C[25 + j*lda]; + temp10 = C[26 + j*lda]; + temp11 = C[27 + j*lda]; + temp12 = C[28 + j*lda]; + temp13 = C[29 + j*lda]; + temp14 = C[30 + j*lda]; + temp15 = C[31 + j*lda]; + for(k = 0; k < 32; k++) { + temp0 += A[j*lda + k] * B[16 + k*lda]; + temp1 += A[j*lda + k] * B[17 + k*lda]; + temp2 += A[j*lda + k] * B[18 + k*lda]; + temp3 += A[j*lda + k] * B[19 + k*lda]; + temp4 += A[j*lda + k] * B[20 + k*lda]; + temp5 += A[j*lda + k] * B[21 + k*lda]; + temp6 += A[j*lda + k] * B[22 + k*lda]; + temp7 += A[j*lda + k] * B[23 + k*lda]; + temp8 += A[j*lda + k] * B[24 + k*lda]; + temp9 += A[j*lda + k] * B[25 + k*lda]; + temp10 += A[j*lda + k] * B[26 + k*lda]; + temp11 += A[j*lda + k] * B[27 + k*lda]; + temp12 += A[j*lda + k] * B[28 + k*lda]; + temp13 += A[j*lda + k] * B[29 + k*lda]; + temp14 += A[j*lda + k] * B[30 + k*lda]; + temp15 += A[j*lda + k] * B[31 + k*lda]; + } + C[16 + j*lda] = temp0; + C[17 + j*lda] = temp1; + C[18 + j*lda] = temp2; + C[19 + j*lda] = temp3; + C[20 + j*lda] = temp4; + C[21 + j*lda] = temp5; + C[22 + j*lda] = temp6; + C[23 + j*lda] = temp7; + C[24 + j*lda] = temp8; + C[25 + j*lda] = temp9; + C[26 + j*lda] = temp10; + C[27 + j*lda] = temp11; + C[28 + j*lda] = temp12; + C[29 + j*lda] = temp13; + C[30 + j*lda] = temp14; + C[31 + j*lda] = temp15; + } + } +} diff --git a/mt/dm_matmul.c b/mt/dm_matmul.c new file mode 100644 index 0000000..1a777d9 --- /dev/null +++ b/mt/dm_matmul.c @@ -0,0 +1,196 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int i, j, k; + int space=lda/ncores; + int max= space*coreid+space; + static data_t B1[32*32]; + if (coreid==ncores-1){ + for (i=0; i 1) return; + if (coreid || ncores == 1) { +// for ( ii = 0; ii < 32; ii+=IC ) + for ( kk = 0; kk < 32; kk+=16 ) + for ( j = 0; j < 16; j++ ) +// for ( j = 0; j < 16; j++ ) + { + for ( i = 0; i < 32; i+=8 ) +// for ( i = ii; i < ii + IC && i < 32; i+=8 ) + { + data_t temp0 = C[i+j*32]; + data_t temp1 = C[i+j*32+1]; + data_t temp2 = C[i+j*32+2]; + data_t temp3 = C[i+j*32+3]; + data_t temp4 = C[i+j*32+4]; + data_t temp5 = C[i+j*32+5]; + data_t temp6 = C[i+j*32+6]; + data_t temp7 = C[i+j*32+7]; + for ( k = kk; k < kk+16 && k < 32; k++ ) +// for ( k = 0; k < 32; k++ ) + { + data_t tempA = A[j*32+k]; + temp0 += tempA * B[k*32 + i]; + temp1 += tempA * B[k*32 + i+1]; + temp2 += tempA * B[k*32 + i+2]; + temp3 += tempA * B[k*32 + i+3]; + temp4 += tempA * B[k*32 + i+4]; + temp5 += tempA * B[k*32 + i+5]; + temp6 += tempA * B[k*32 + i+6]; + temp7 += tempA * B[k*32 + i+7]; + } + C[i+j*32] = temp0; + C[i+j*32+1] = temp1; + C[i+j*32+2] = temp2; + C[i+j*32+3] = temp3; + C[i+j*32+4] = temp4; + C[i+j*32+5] = temp5; + C[i+j*32+6] = temp6; + C[i+j*32+7] = temp7; + } + } + } + if(coreid == 0){ +// for ( ii = 0; ii < 32; ii+=IC ) + for ( kk = 0; kk < 32; kk+=16 ) + for ( j = 16; j < 32; j++ ) +// for ( j = 16; j < 32; j++ ) + { + for ( i = 0; i < 32; i+=8 ) +// for ( i = ii; i < ii + IC && i < 32; i+=8 ) + { + data_t temp0 = C[i+j*32]; + data_t temp1 = C[i+j*32+1]; + data_t temp2 = C[i+j*32+2]; + data_t temp3 = C[i+j*32+3]; + data_t temp4 = C[i+j*32+4]; + data_t temp5 = C[i+j*32+5]; + data_t temp6 = C[i+j*32+6]; + data_t temp7 = C[i+j*32+7]; + for ( k = kk; k < kk+16 && k < 32; k++ ) + { + data_t tempA = A[j*32+k]; + temp0 += tempA * B[k*32 + i]; + temp1 += tempA * B[k*32 + i+1]; + temp2 += tempA * B[k*32 + i+2]; + temp3 += tempA * B[k*32 + i+3]; + temp4 += tempA * B[k*32 + i+4]; + temp5 += tempA * B[k*32 + i+5]; + temp6 += tempA * B[k*32 + i+6]; + temp7 += tempA * B[k*32 + i+7]; + } + C[i+j*32] = temp0; + C[i+j*32+1] = temp1; + C[i+j*32+2] = temp2; + C[i+j*32+3] = temp3; + C[i+j*32+4] = temp4; + C[i+j*32+5] = temp5; + C[i+j*32+6] = temp6; + C[i+j*32+7] = temp7; + } + + } + } +} diff --git a/mt/matmul/dataset.h b/mt/matmul/dataset.h deleted file mode 100755 index dde3ee4..0000000 --- a/mt/matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/matmul/matmul.c b/mt/matmul/matmul.c deleted file mode 100755 index b009d26..0000000 --- a/mt/matmul/matmul.c +++ /dev/null @@ -1,167 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/matmul/matmul_gendata.pl b/mt/matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/mt-matmul/bmark.mk b/mt/mt-matmul/bmark.mk deleted file mode 100644 index 67d6af3..0000000 --- a/mt/mt-matmul/bmark.mk +++ /dev/null @@ -1,29 +0,0 @@ -#======================================================================= -# UCB CS250 Makefile fragment for benchmarks -#----------------------------------------------------------------------- -# -# Each benchmark directory should have its own fragment which -# essentially lists what the source files are and how to link them -# into an riscv and/or host executable. All variables should include -# the benchmark name as a prefix so that they are unique. -# - -mt_matmul_c_src = \ - mt-matmul.c \ - -mt_matmul_riscv_src = \ - crt-mt.S \ - -mt_matmul_c_objs = $(patsubst %.c, %.o, $(mt_matmul_c_src)) -mt_matmul_riscv_objs = $(patsubst %.S, %.o, $(mt_matmul_riscv_src)) - -mt_matmul_host_bin = mt-matmul.host -$(mt_matmul_host_bin) : $(mt_matmul_c_src) - $(HOST_COMP) $^ -o $(mt_matmul_host_bin) - -mt_matmul_riscv_bin = mt-matmul.riscv -$(mt_matmul_riscv_bin) : $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) - $(RISCV_LINK_MT) $(RISCV_LINK_SYSCALL) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) -o $(mt_matmul_riscv_bin) - -junk += $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) \ - $(mt_matmul_host_bin) $(mt_matmul_riscv_bin) diff --git a/mt/mt-matmul/dataset.h b/mt/mt-matmul/dataset.h deleted file mode 100644 index dde3ee4..0000000 --- a/mt/mt-matmul/dataset.h +++ /dev/null @@ -1,174 +0,0 @@ - -#define ARRAY_SIZE 1024 - - -#define DIM_SIZE 32 - -static data_t input1_data[ARRAY_SIZE] = -{ - 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, - 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0, - 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0, - 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3, - 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1, - 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2, - 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2, - 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1, - 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3, - 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1, - 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1, - 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3, - 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3, - 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3, - 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0, - 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0, - 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0, - 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2, - 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3, - 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1, - 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2, - 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0, - 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3, - 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3, - 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1, - 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3, - 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0, - 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2, - 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, - 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3, - 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2, - 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1, - 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3, - 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0, - 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2, - 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3, - 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3, - 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, - 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0, - 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0, - 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0, - 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0, - 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, - 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0, - 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1, - 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2, - 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1, - 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1, - 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3, - 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3, - 3, 2, 2, 1 -}; - -static data_t input2_data[ARRAY_SIZE] = -{ - 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2, - 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2, - 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3, - 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2, - 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1, - 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2, - 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1, - 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2, - 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2, - 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1, - 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3, - 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0, - 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3, - 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0, - 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2, - 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1, - 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3, - 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0, - 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0, - 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1, - 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0, - 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3, - 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1, - 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0, - 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2, - 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0, - 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2, - 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1, - 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0, - 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0, - 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0, - 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2, - 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2, - 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0, - 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0, - 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1, - 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3, - 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1, - 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2, - 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0, - 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0, - 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1, - 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0, - 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2, - 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1, - 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0, - 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2, - 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1, - 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0, - 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0, - 1, 1, 2, 1 -}; - -static data_t verify_data[ARRAY_SIZE] = -{ - 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83, - 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54, - 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50, - 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64, - 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95, - 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68, - 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73, - 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77, - 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70, - 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53, - 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66, - 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70, - 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83, - 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68, - 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48, - 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47, - 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69, - 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42, - 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48, - 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101, - 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90, - 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65, - 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64, - 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69, - 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92, - 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63, - 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64, - 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75, - 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89, - 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58, - 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54, - 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70, - 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97, - 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42, - 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52, - 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91, - 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89, - 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53, - 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56, - 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68, - 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88, - 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51, - 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59, - 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62, - 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89, - 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48, - 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70, - 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82, - 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82, - 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67, - 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71, - 69, 73, 94, 89 -}; - diff --git a/mt/mt-matmul/matmul_gendata.pl b/mt/mt-matmul/matmul_gendata.pl deleted file mode 100755 index f21bb46..0000000 --- a/mt/mt-matmul/matmul_gendata.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# matmul_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the matmul benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: matmul_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[ARRAY_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3d",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3d",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - - - -#-------------------------------------------------------------------------- -# Matmul -#-------------------------------------------------------------------------- - -# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/ - -sub mmult { - my ($m1,$m2) = @_; - my ($m1rows,$m1cols) = matdim($m1); - my ($m2rows,$m2cols) = matdim($m2); - - my $result = [ ]; - my ($i, $j, $k); - - for $i (range($m1rows)) { - for $j (range($m2cols)) { - for $k (range($m1cols)) { - $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; - } - } - } - return $result; -} - -sub range { 0 .. ($_[0] - 1) } - - -sub veclen { - my $ary_ref = $_[0]; - my $type = ref $ary_ref; - if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" } - return scalar(@$ary_ref); -} - -sub matdim { - my $matrix = $_[0]; - my $rows = veclen($matrix); - my $cols = veclen($matrix->[0]); - return ($rows, $cols); -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - # create random input arrays - my $mat_values1; - my $mat_values2; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - $mat_values1->[$i][$j] = int(rand(4)); - $mat_values2->[$i][$j] = int(rand(4)); - } - } - - # perform matmul - my $mat_results = mmult( $mat_values1, $mat_values2 ); - - # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris) - my @values1; - my @values2; - my @results; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - for ( my $j = 0; $j < $opts{"size"}; $j++ ) { - my $value1 = $mat_values1->[$i][$j]; - my $value2 = $mat_values2->[$i][$j]; - my $result = $mat_results->[$i][$j]; - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @results, $result ); - } - } - - print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n"; - print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n"; - - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@results); - -} - -main(); - diff --git a/mt/mt-matmul/mt-matmul.c b/mt/mt-matmul/mt-matmul.c deleted file mode 100644 index b009d26..0000000 --- a/mt/mt-matmul/mt-matmul.c +++ /dev/null @@ -1,167 +0,0 @@ -//************************************************************************** -// Multi-threaded Matrix Multiply benchmark -//-------------------------------------------------------------------------- -// TA : Christopher Celio -// Student: -// -// -// This benchmark multiplies two 2-D arrays together and writes the results to -// a third vector. The input data (and reference data) should be generated -// using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. - - -// print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// matmul function - -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - - -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[ARRAY_SIZE]; - - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results:", ARRAY_SIZE, results_data); - printArrayMT("verify :", ARRAY_SIZE, verify_data); -#endif - - // verify - verifyMT(ARRAY_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/mt-vvadd/bmark.mk b/mt/mt-vvadd/bmark.mk deleted file mode 100644 index 0ab2504..0000000 --- a/mt/mt-vvadd/bmark.mk +++ /dev/null @@ -1,29 +0,0 @@ -#======================================================================= -# UCB CS250 Makefile fragment for benchmarks -#----------------------------------------------------------------------- -# -# Each benchmark directory should have its own fragment which -# essentially lists what the source files are and how to link them -# into an riscv and/or host executable. All variables should include -# the benchmark name as a prefix so that they are unique. -# - -mt_vvadd_c_src = \ - mt-vvadd.c \ - -mt_vvadd_riscv_src = \ - crt-mt.S \ - -mt_vvadd_c_objs = $(patsubst %.c, %.o, $(mt_vvadd_c_src)) -mt_vvadd_riscv_objs = $(patsubst %.S, %.o, $(mt_vvadd_riscv_src)) - -mt_vvadd_host_bin = mt-vvadd.host -$(mt_vvadd_host_bin) : $(mt_vvadd_c_src) - $(HOST_COMP) $^ -o $(mt_vvadd_host_bin) - -mt_vvadd_riscv_bin = mt-vvadd.riscv -$(mt_vvadd_riscv_bin) : $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) - $(RISCV_LINK_MT) $(RISCV_LINK_SYSCALL) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) -o $(mt_vvadd_riscv_bin) - -junk += $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) \ - $(mt_vvadd_host_bin) $(mt_vvadd_riscv_bin) diff --git a/mt/mt-vvadd/dataset.h b/mt/mt-vvadd/dataset.h deleted file mode 100644 index ce9f936..0000000 --- a/mt/mt-vvadd/dataset.h +++ /dev/null @@ -1,165 +0,0 @@ - -#define DATA_SIZE 1000 - -static data_t input1_data[DATA_SIZE] = -{ - 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, - 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00, - 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00, - 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00, - 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00, - 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00, - 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00, - 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00, - 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00, - 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00, - 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00, - 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00, - 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00, - 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00, - 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00, - 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00, - 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00, - 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00, - 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00, - 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00, - 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00, - 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00, - 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00, - 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00, - 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00, - 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00, - 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00, - 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00, - 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00, - 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00, - 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00, - 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00, - 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00, - 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00, - 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00, - 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00, - 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00, - 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00, - 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00, - 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00, - 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00, - 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00, - 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00, - 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00, - 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00, - 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00, - 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00, - 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00, - 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00, - 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00 -}; - -static data_t input2_data[DATA_SIZE] = -{ - 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00, - 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00, - 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00, - 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00, - 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00, - 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00, - 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00, - 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00, - 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00, - 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00, - 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00, - 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00, - 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00, - 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00, - 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00, - 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00, - 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00, - 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00, - 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00, - 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00, - 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00, - 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00, - 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00, - 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00, - 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00, - 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00, - 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00, - 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00, - 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00, - 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00, - 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00, - 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00, - 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00, - 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00, - 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00, - 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00, - 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00, - 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00, - 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00, - 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00, - 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00, - 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00, - 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00, - 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00, - 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00, - 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00, - 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00, - 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00, - 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00, - 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00 -}; - -static data_t verify_data[DATA_SIZE] = -{ - 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00, - 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00, - 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00, - 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00, - 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00, - 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00, - 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00, - 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00, - 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00, - 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00, - 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00, - 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00, - 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00, - 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00, - 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00, - 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00, - 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00, - 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00, - 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00, - 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00, - 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00, - 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00, - 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00, - 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00, - 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00, - 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00, - 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00, - 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00, - 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00, - 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00, - 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00, - 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00, - 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00, - 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00, - 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00, - 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00, - 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00, - 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00, - 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00, - 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00, - 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00, - 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00, - 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00, - 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00, - 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00, - 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00, - 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00, - 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00, - 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00, - 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 -}; - diff --git a/mt/mt-vvadd/mt-vvadd.c b/mt/mt-vvadd/mt-vvadd.c deleted file mode 100644 index d5868e4..0000000 --- a/mt/mt-vvadd/mt-vvadd.c +++ /dev/null @@ -1,165 +0,0 @@ -//************************************************************************** -// Vector-vector add benchmark -//-------------------------------------------------------------------------- -// Author : Andrew Waterman -// TA : Christopher Celio -// Student : -// -// This benchmark adds two vectors and writes the results to a -// third vector. The input data (and reference data) should be -// generated using the vvadd_gendata.pl perl script and dumped -// to a file named dataset.h - -// to print out arrays, etc. -//#define DEBUG - -//-------------------------------------------------------------------------- -// Includes - -#include -#include -#include - - -//-------------------------------------------------------------------------- -// Input/Reference Data - -typedef float data_t; -#include "dataset.h" - - -//-------------------------------------------------------------------------- -// Basic Utilities and Multi-thread Support - -__thread unsigned long coreid; -unsigned long ncores; - -#include "util.h" - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - - -//-------------------------------------------------------------------------- -// Helper functions - -void printArrayMT( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - -//-------------------------------------------------------------------------- -// vvadd function - -//perform in-place vvadd -void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} - -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // -} - -//-------------------------------------------------------------------------- -// Main -// -// all threads start executing thread_entry(). Use their "coreid" to -// differentiate between threads (each thread is running on a separate core). - -void thread_entry(int cid, int nc) -{ - coreid = cid; - ncores = nc; - - // static allocates data in the binary, which is visible to both threads - static data_t results_data[DATA_SIZE]; - - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd - barrier(nc); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); - - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - barrier(nc); - - - // Execute your faster vvadd - barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - -#ifdef DEBUG - printArrayMT("results: ", DATA_SIZE, results_data); - printArrayMT("verify : ", DATA_SIZE, verify_data); -#endif - - // verify - verifyMT(DATA_SIZE, results_data, verify_data); - barrier(nc); - - exit(0); -} - diff --git a/mt/mt-vvadd/vvadd_gendata.pl b/mt/mt-vvadd/vvadd_gendata.pl deleted file mode 100755 index a9fceac..0000000 --- a/mt/mt-vvadd/vvadd_gendata.pl +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================== -# vvadd_gendata.pl -# -# Author : Christopher Batten (cbatten@mit.edu) -# Date : April 29, 2005 -# -(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm; -# -# Simple script which creates an input data set and the reference data -# for the vvadd benchmark. -# -ENDMSG - -use strict "vars"; -use warnings; -no warnings("once"); -use Getopt::Long; - -#-------------------------------------------------------------------------- -# Command line processing -#-------------------------------------------------------------------------- - -our %opts; - -sub usage() -{ - - print "\n"; - print " Usage: vvadd_gendata.pl [options] \n"; - print "\n"; - print " Options:\n"; - print " --help print this message\n"; - print " --size size of input data [1000]\n"; - print " --seed random seed [1]\n"; - print "$usageMsg"; - - exit(); -} - -sub processCommandLine() -{ - - $opts{"help"} = 0; - $opts{"size"} = 1000; - $opts{"seed"} = 1; - Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage(); - $opts{"help"} and usage(); - -} - -#-------------------------------------------------------------------------- -# Helper Functions -#-------------------------------------------------------------------------- - -sub printArray -{ - my $arrayName = $_[0]; - my $arrayRef = $_[1]; - - my $numCols = 20; - my $arrayLen = scalar(@{$arrayRef}); - - print "static data_t ".$arrayName."[DATA_SIZE] = \n"; - print "{\n"; - - if ( $arrayLen <= $numCols ) { - print " "; - for ( my $i = 0; $i < $arrayLen; $i++ ) { - print sprintf("%3.2f",$arrayRef->[$i]); - if ( $i != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - else { - my $numRows = int($arrayLen/$numCols); - for ( my $j = 0; $j < $numRows; $j++ ) { - print " "; - for ( my $i = 0; $i < $numCols; $i++ ) { - my $index = $j*$numCols + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - if ( $arrayLen > ($numRows*$numCols) ) { - print " "; - for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) { - my $index = $numCols*$numRows + $i; - print sprintf("%3.2f",$arrayRef->[$index]); - if ( $index != $arrayLen-1 ) { - print ", "; - } - } - print "\n"; - } - - } - - print "};\n\n"; -} - -#-------------------------------------------------------------------------- -# Main -#-------------------------------------------------------------------------- - -sub main() -{ - - processCommandLine(); - srand($opts{"seed"}); - - my @values1; - my @values2; - my @sum; - for ( my $i = 0; $i < $opts{"size"}; $i++ ) { - my $value1 = int(rand(19)); - my $value2 = int(rand(19)); - push( @values1, $value1 ); - push( @values2, $value2 ); - push( @sum, $value1 + $value2 ); - } - - - print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n"; - printArray( "input1_data", \@values1 ); - printArray( "input2_data", \@values2 ); - printArray( "verify_data", \@sum ); - -} - -main(); - diff --git a/mt/vvadd0.c b/mt/vvadd0.c new file mode 100755 index 0000000..4cc66b9 --- /dev/null +++ b/mt/vvadd0.c @@ -0,0 +1,14 @@ +#include "stdlib.h" +#include "dataset.h" + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + size_t i; + size_t leftover = n % (n / ncores); + for (i = coreid * (n / ncores); i < (coreid + 1) * (n / ncores); i++) { + z[i] = x[i] + y[i]; + } + for (i = (n - leftover) + coreid; i < n; i += ncores) { + z[i] = x[i] + y[i]; + } +} diff --git a/mt/vvadd1.c b/mt/vvadd1.c new file mode 100755 index 0000000..b7e44a7 --- /dev/null +++ b/mt/vvadd1.c @@ -0,0 +1,17 @@ +#include "stdlib.h" +#include "dataset.h" + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + size_t i; + for (i = coreid*4; i < n; i += 8*ncores) { + z[i] = x[i] + y[i]; + z[i+1] = x[i+1] + y[i+1]; + z[i+2] = x[i+2] + y[i+2]; + z[i+3] = x[i+3] + y[i+3]; + z[i+ncores*4] = x[i+ncores*4] + y[i+ncores*4]; + z[i+ncores*4+1] = x[i+ncores*4+1] + y[i+ncores*4+1]; + z[i+ncores*4+2] = x[i+ncores*4+2] + y[i+ncores*4+2]; + z[i+ncores*4+3] = x[i+ncores*4+3] + y[i+ncores*4+3]; + } +} diff --git a/mt/vvadd2.c b/mt/vvadd2.c new file mode 100644 index 0000000..937a2e9 --- /dev/null +++ b/mt/vvadd2.c @@ -0,0 +1,11 @@ +#include "stdlib.h" +#include "dataset.h" + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + size_t i; + for (i = coreid; i < n; i += 2*ncores) { + z[i] = x[i] + y[i]; + z[i+ncores] = x[i+ncores] + y[i+ncores]; + } +} diff --git a/mt/vvadd3.c b/mt/vvadd3.c new file mode 100755 index 0000000..d18d2de --- /dev/null +++ b/mt/vvadd3.c @@ -0,0 +1,22 @@ +#include "stdlib.h" +#include "dataset.h" + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + data_t* to = &z[coreid * (n / ncores)]; + const data_t* from1 = &x[coreid * (n / ncores)]; + const data_t* from2 = &y[coreid * (n / ncores)]; + size_t count = n / ncores; + size_t c = (count + 7) / 8; + switch(count % 8) { + case 0: do { *to++ = *from1++ + *from2++; + case 7: *to++ = *from1++ + *from2++; + case 6: *to++ = *from1++ + *from2++; + case 5: *to++ = *from1++ + *from2++; + case 4: *to++ = *from1++ + *from2++; + case 3: *to++ = *from1++ + *from2++; + case 2: *to++ = *from1++ + *from2++; + case 1: *to++ = *from1++ + *from2++; + } while(--c > 0); + } +} diff --git a/mt/vvadd4.c b/mt/vvadd4.c new file mode 100644 index 0000000..8f4d43f --- /dev/null +++ b/mt/vvadd4.c @@ -0,0 +1,16 @@ +#include "stdlib.h" +#include "dataset.h" + +//-------------------------------------------------------------------------- +// vvadd function + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + size_t i; + + // interleave accesses + for (i = coreid; i < n; i+=ncores) + { + z[i] = x[i] + y[i]; + } +}