dhrystone \
spmv \
mt-vvadd \
+ mt-matmul \
#vec-fft \
- #mt-matmul \
#vec-vvadd \
#vec-cmplxmult \
#vec-matmul \
towers \
vvadd \
multiply \
- mm \
spmv \
vec-vvadd \
vec-cmplxmult \
#include "encoding.h"
#endif
+#define stringify_1(s) #s
+#define stringify(s) stringify_1(s)
+#define stats(code, iter) do { \
+ unsigned long _c = -rdcycle(), _i = -rdinstret(); \
+ code; \
+ _c += rdcycle(), _i += rdinstret(); \
+ if (cid == 0) \
+ printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
+ stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
+ } while(0)
+
#endif //__UTIL_H
mt_matmul_c_src = \
mt-matmul.c \
+ matmul.c \
syscalls.c \
mt_matmul_riscv_src = \
+#ifndef __DATASET_H
+#define __DATASET_H
#define ARRAY_SIZE 1024
-
#define DIM_SIZE 32
+typedef double data_t;
+
static data_t input1_data[ARRAY_SIZE] =
{
0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
69, 73, 94, 89
};
+
+#endif //__DATASET_H
--- /dev/null
+#include "dataset.h"
+
+//--------------------------------------------------------------------------
+// single-thread, naive version
+//
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+
+ for ( i = 0; i < lda; i++ )
+ {
+ for ( j = 0; j < lda; j++ )
+ {
+ for ( k = coreid; k < lda; k+=ncores )
+ {
+ C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
+ }
+ }
+ }
+}
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef double data_t;
#include "dataset.h"
-
+
//--------------------------------------------------------------------------
// Basic Utilities and Multi-thread Support
-__thread unsigned long coreid;
-
#include "util.h"
+
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
//--------------------------------------------------------------------------
// matmul function
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
+ extern void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] );
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
//--------------------------------------------------------------------------
// Main
void thread_entry(int cid, int nc)
{
- coreid = cid;
-
- // static allocates data in the binary, which is visible to both threads
static data_t results_data[ARRAY_SIZE];
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
+ stats(matmul(cid, nc, DIM_SIZE, input1_data, input2_data, results_data); barrier(nc), DIM_SIZE/DIM_SIZE/DIM_SIZE);
-
- // verify
int res = verifyDouble(ARRAY_SIZE, results_data, verify_data);
- if (res)
- exit(res);
-
-#if 0
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
#ifdef DEBUG
printArray("results:", ARRAY_SIZE, results_data);
printArray("verify :", ARRAY_SIZE, verify_data);
#endif
-
- // verify
- res = verify(ARRAY_SIZE, results_data, verify_data);
- if (res)
- exit(res);
- barrier(nc);
-#endif
- exit(0);
+ exit(res);
}
mt_vvadd_c_src = \
mt-vvadd.c \
+ vvadd.c \
syscalls.c \
mt_vvadd_riscv_src = \
+#ifndef __DATASET_H
+#define __DATASET_H
#define DATA_SIZE 1000
+typedef double data_t;
+
static data_t input1_data[DATA_SIZE] =
{
0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
};
+
+#endif //__DATASET_H
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef double data_t;
#include "dataset.h"
//--------------------------------------------------------------------------
// Basic Utilities and Multi-thread Support
-__thread unsigned long coreid;
-
#include "util.h"
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
//--------------------------------------------------------------------------
// vvadd function
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(int ncores, size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
+extern void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z);
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-}
//--------------------------------------------------------------------------
// Main
void thread_entry(int cid, int nc)
{
- coreid = cid;
-
// static allocates data in the binary, which is visible to both threads
static data_t results_data[DATA_SIZE];
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
+ // First do out-of-place vvadd
barrier(nc);
- stats(vvadd(nc, DATA_SIZE, results_data, input2_data); barrier(nc));
+ stats(vvadd(cid, nc, DATA_SIZE, input1_data, input2_data, results_data); barrier(nc), DATA_SIZE);
-
- // verify
- int res = verifyDouble(DATA_SIZE, results_data, verify_data);
- if (res)
- exit(res);
-
-#if 0
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
+ if(cid == 0) {
+//#ifdef DEBUG
+ printDoubleArray("out-of-place results: ", DATA_SIZE, results_data);
+ printDoubleArray("out-of-place verify : ", DATA_SIZE, verify_data);
+//#endif
+ int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if(res) exit(res);
}
- barrier(nc);
- // Execute your faster vvadd
+ // Second do in-place vvadd
+ // Copying input
+ size_t i;
+ if(cid == 0) {
+ for (i = 0; i < DATA_SIZE; i++)
+ results_data[i] = input1_data[i];
+ }
barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
+ stats(vvadd(cid, nc, DATA_SIZE, results_data, input2_data, results_data); barrier(nc), DATA_SIZE);
+
+ if(cid == 0) {
#ifdef DEBUG
- printDoubleArray("results: ", DATA_SIZE, results_data);
- printDoubleArray("verify : ", DATA_SIZE, verify_data);
+ printDoubleArray("in-place results: ", DATA_SIZE, results_data);
+ printDoubleArray("in-place verify : ", DATA_SIZE, verify_data);
#endif
+ int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if(res) exit(res);
+ }
- // verify
- res = verifyDouble(DATA_SIZE, results_data, verify_data);
- if (res)
- exit(res);
barrier(nc);
-#endif
-
exit(0);
}
--- /dev/null
+#include "stdlib.h"
+#include "dataset.h"
+
+//--------------------------------------------------------------------------
+// vvadd function
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ size_t i;
+
+ // interleave accesses
+ for (i = coreid; i < n; i+=ncores)
+ {
+ z[i] = x[i] + y[i];
+ }
+}
-#=======================================================================
+#/=======================================================================
# UCB VLSI FLOW: Makefile for riscv-bmarks/mt
#-----------------------------------------------------------------------
# Henry Cook (hcook@cs.berkeley.edu)
# Sources
#--------------------------------------------------------------------
-bmarks = \
-ab_matmul\
-ab_vvadd\
+bmarks_matmul = \
ad_matmul\
-ad_vvadd\
ae_matmul\
-ae_vvadd\
af_matmul\
-af_vvadd\
ag_matmul\
-ag_vvadd\
ai_matmul\
-ai_vvadd\
-aj_vvadd\
ak_matmul\
-ak_vvadd\
al_matmul\
-al_vvadd\
am_matmul\
-am_vvadd\
an_matmul\
ap_matmul\
-ap_vvadd\
aq_matmul\
-aq_vvadd\
ar_matmul\
-ar_vvadd\
-as_matmul\
-as_vvadd\
at_matmul\
-at_vvadd\
av_matmul\
-av_vvadd\
ay_matmul\
-ay_vvadd\
az_matmul\
-az_vvadd\
-ba_matmul\
-ba_vvadd\
bb_matmul\
-bb_vvadd\
bc_matmul\
-bc_vvadd\
-be_matmul\
-be_vvadd\
bf_matmul\
-bf_vvadd\
bh_matmul\
-bh_vvadd\
bj_matmul\
-bj_vvadd\
bk_matmul\
-bk_vvadd\
bm_matmul\
-bm_vvadd\
-bn_matmul\
-bn_vvadd\
bo_matmul\
-bo_vvadd\
-bp_matmul\
-bp_vvadd\
br_matmul\
-br_vvadd\
bs_matmul\
-bs_vvadd\
-bt_matmul\
-bt_vvadd\
+ce_matmul\
+cf_matmul\
+cg_matmul\
+ci_matmul\
+ck_matmul\
+cl_matmul\
+cm_matmul\
+cs_matmul\
+cv_matmul\
+cy_matmul\
+dc_matmul\
+df_matmul\
+dm_matmul\
+do_matmul\
+dr_matmul\
+ds_matmul\
+du_matmul\
+dv_matmul\
+
+bmarks_vvadd = \
+vvadd0\
+vvadd1\
+vvadd2\
+vvadd3\
+vvadd4\
+
+bmarks = $(bmarks_vvadd) $(bmarks_matmul)
#--------------------------------------------------------------------
# Build rules
RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data
RISCV_SIM = spike -p2
-VPATH += $(addprefix $(bmarkdir)/, $(bmarks))
-VPATH += $(common)
+VPATH += $(common) $(common)/../mt-matmul $(common)/../mt-vvadd
-incs += -I. -I$(bmarkdir)/../env -I$(common) $(addprefix -I$(bmarkdir)/, $(bmarks))
+incs += -I. -I$(bmarkdir)/../env -I$(common) -I$(common)/../mt-matmul -I$(common)/../mt-vvadd
objs :=
#include $(patsubst %, $(bmarkdir)/%/bmark.mk, $(bmarks))
#------------------------------------------------------------
bmarks_riscv_obj = $(addsuffix .o, $(bmarks))
-bmarks_riscv_bin = $(addsuffix .riscv, $(bmarks))
+bmarks_riscv_matmul_bin = $(addsuffix .riscv, $(bmarks_matmul))
+bmarks_riscv_vvadd_bin = $(addsuffix .riscv, $(bmarks_vvadd))
bmarks_riscv_dump = $(addsuffix .riscv.dump, $(bmarks))
bmarks_riscv_hex = $(addsuffix .riscv.hex, $(bmarks))
bmarks_riscv_out = $(addsuffix .riscv.out, $(bmarks))
+bmarks_riscv_bin = $(bmarks_riscv_matmul_bin) $(bmarks_riscv_vvadd_bin)
bmarks_defs = -DPREALLOCATE=1 -DHOST_DEBUG=0
bmarks_cycles = 80000
%.hex: %
elf2hex 16 32768 $< > $@
-$(bmarks_riscv_bin): %.riscv: %.o syscalls.o crt.o
- $(RISCV_LINK) $< syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@
+$(bmarks_riscv_vvadd_bin): %.riscv: %.o mt-vvadd.o syscalls.o crt.o
+ $(RISCV_LINK) $< mt-vvadd.o syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@
+
+$(bmarks_riscv_matmul_bin): %.riscv: %.o mt-matmul.o syscalls.o crt.o
+ $(RISCV_LINK) $< mt-matmul.o syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@
$(bmarks_riscv_dump): %.riscv.dump: %.riscv
$(RISCV_OBJDUMP) $< > $@
run-riscv: $(bmarks_riscv_out)
echo; perl -ne 'print " [$$1] $$ARGV \t$$2\n" if /\*{3}(.{8})\*{3}(.*)/' \
-junk += $(bmarks_riscv_bin) $(bmarks_riscv_dump) $(bmarks_riscv_hex) $(bmarks_riscv_out)
+junk += $(bmarks_riscv_bin) $(bmarks_riscv_dump) $(bmarks_riscv_hex) $(bmarks_riscv_out) $(bmarks_riscv_obj)
#------------------------------------------------------------
# Clean up
clean:
- rm -rf $(objs) $(junk)
+ rm -rf $(objs) $(junk) syscall.o crt.o mt-matmul.o mt-vvadd.o
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- // I think I've got a way for this to not need the "shared" state to work nicely, so no MSI version
- int i, j, k, lda_over_2;
- lda_over_2 = lda/2;
-
- if(coreid > 1)
- return;
- // left side of c
- if(coreid == 0)
- {
- // first half of topleft corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of topleft corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of bottomleft corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // first half of bottomleft corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- }
- else // coreid == 1
- {
- // first half of bottomright corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of bottomright corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of topright corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // first half of topright corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- // I think I've got a way for this to not need the "shared" state to work nicely, so no MSI version
- int i, j, k, lda_over_2;
- lda_over_2 = lda/2;
-
- if(coreid > 1)
- return;
- // left side of c
- if(coreid == 0)
- {
- // first half of topleft corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of topleft corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of bottomleft corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // first half of bottomleft corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = 0; j < lda_over_2; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- }
- else // coreid == 1
- {
- // first half of bottomright corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of bottomright corner
- for(i = lda_over_2; i < lda; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // second half of topright corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = lda_over_2; k < lda; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- // first half of topright corner
- for(i = 0; i < lda_over_2; i++) {
- for(j = lda_over_2; j < lda; j++) {
- for(k = 0; k < lda_over_2; k++) {
- C[i*lda + j] += A[i*lda + k]*B[k*lda + j];
- }
- }
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i, j;
- j = (coreid+1)*n/ncores;
- for (i = coreid*n/ncores; i < j; i++)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, k;
+ int j = coreid*(lda/ncores);
+ int jend = (coreid+1)*(lda/ncores);
+ for ( ; j < jend; j++ )
+ {
+ int j32 = j << 5;
+ data_t* Cj32 = C + j32;
+ for ( k = 0; k < 32; k+=2 )
+ {
+ data_t Aj32k = A[k + j32];
+ data_t Aj32k2 = A[k + 1 + j32];
+ data_t* Bk32 = B + (k << 5);
+ data_t* Bk322 = Bk32 + 32;
+ for ( i = 0; i < 32; i+=4 )
+ {
+ Cj32[i] += Aj32k * Bk32 [i];
+ Cj32[i] += Aj32k2 * Bk322 [i];
+ Cj32[i+1] += Aj32k * Bk32 [i+1];
+ Cj32[i+1] += Aj32k2 * Bk322[i+1];
+ Cj32[i+2] += Aj32k * Bk32 [i+2];
+ Cj32[i+2] += Aj32k2 * Bk322[i+2];
+ Cj32[i+3] += Aj32k * Bk32 [i+3];
+ Cj32[i+3] += Aj32k2 * Bk322[i+3];
+ }
+ barrier(ncores);
+ }
+ }
+
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j=0, k, jend=16;
- if (coreid != 0) {
- j = jend;
- jend = jend << 1;
- }
- for ( ; j < jend; j++ )
- {
- int j32 = j << 5;
- data_t* Cj32 = C + j32;
- for ( k = 0; k < 32; k+=2 )
- {
- data_t Aj32k = A[k + j32];
- data_t Aj32k2 = A[k + 1 + j32];
- data_t* Bk32 = B + (k << 5);
- data_t* Bk322 = Bk32 + 32;
- for ( i = 0; i < 32; i+=4 )
- {
- Cj32[i] += Aj32k * Bk32 [i];
- Cj32[i] += Aj32k2 * Bk322 [i];
- Cj32[i+1] += Aj32k * Bk32 [i+1];
- Cj32[i+1] += Aj32k2 * Bk322[i+1];
- Cj32[i+2] += Aj32k * Bk32 [i+2];
- Cj32[i+2] += Aj32k2 * Bk322[i+2];
- Cj32[i+3] += Aj32k * Bk32 [i+3];
- Cj32[i+3] += Aj32k2 * Bk322[i+3];
- }
- }
- }
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j=0, k, jend=16;
- if (coreid != 0) {
- j = jend;
- jend = jend << 1;
- }
- for ( ; j < jend; j++ )
- {
- int j32 = j << 5;
- data_t* Cj32 = C + j32;
- for ( k = 0; k < 32; k+=2 )
- {
- data_t Aj32k = A[k + j32];
- data_t Aj32k2 = A[k + 1 + j32];
- data_t* Bk32 = B + (k << 5);
- data_t* Bk322 = Bk32 + 32;
- for ( i = 0; i < 32; i+=4 )
- {
- Cj32[i] += Aj32k * Bk32 [i];
- Cj32[i] += Aj32k2 * Bk322 [i];
- Cj32[i+1] += Aj32k * Bk32 [i+1];
- Cj32[i+1] += Aj32k2 * Bk322[i+1];
- Cj32[i+2] += Aj32k * Bk32 [i+2];
- Cj32[i+2] += Aj32k2 * Bk322[i+2];
- Cj32[i+3] += Aj32k * Bk32 [i+3];
- Cj32[i+3] += Aj32k2 * Bk322[i+3];
- }
- }
- }
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- size_t m = n/2;
- if (coreid == 0) {
- for (i = 0; i < m; i++) {
- x[i] = x[i] + y[i];
- }
- } else {
- for (i = m; i < n; i++) {
- x[i] = x[i] + y[i];
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+
+
+ data_t *b1;
+ data_t *b2;
+ data_t *b3;
+ data_t *b4;
+ data_t c1;
+ data_t c2;
+ data_t c3;
+ data_t c4;
+ data_t a1;
+ data_t a2;
+ data_t a3;
+ data_t a4;
+ data_t a5;
+ data_t a6;
+ data_t a7;
+ data_t a8;
+ int i, j, k;
+ static data_t BB[1024];
+
+
+
+ //transpose B
+ for ( k = 0; k < lda; k++) {
+ for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores); i++ ) {
+ BB[i*lda + k] = B[k*lda + i];
+ }
+ barrier(ncores);
+ }
+
+ for ( i = 0; i < lda; i+=4 ) {
+ for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) {
+ c1 = 0; c2 = 0; c3 = 0; c4 = 0;
+ b1 = &BB[(i+0)*lda];
+ b2 = &BB[(i+1)*lda];
+ b3 = &BB[(i+2)*lda];
+ b4 = &BB[(i+3)*lda];
+ for ( k = 0; k < lda; k+=8 ) {
+
+ a1 = A[j*lda + k+0];
+ a2 = A[j*lda + k+1];
+ a3 = A[j*lda + k+2];
+ a4 = A[j*lda + k+3];
+ a5 = A[j*lda + k+4];
+ a6 = A[j*lda + k+5];
+ a7 = A[j*lda + k+6];
+ a8 = A[j*lda + k+7];
+
+ c1 += a1 * b1[k+0];
+ c1 += a2 * b1[k+1];
+ c1 += a3 * b1[k+2];
+ c1 += a4 * b1[k+3];
+ c1 += a5 * b1[k+4];
+ c1 += a6 * b1[k+5];
+ c1 += a7 * b1[k+6];
+ c1 += a8 * b1[k+7];
+
+ c2 += a1 * b2[k+0];
+ c2 += a2 * b2[k+1];
+ c2 += a3 * b2[k+2];
+ c2 += a4 * b2[k+3];
+ c2 += a5 * b2[k+4];
+ c2 += a6 * b2[k+5];
+ c2 += a7 * b2[k+6];
+ c2 += a8 * b2[k+7];
+
+ c3 += a1 * b3[k+0];
+ c3 += a2 * b3[k+1];
+ c3 += a3 * b3[k+2];
+ c3 += a4 * b3[k+3];
+ c3 += a5 * b3[k+4];
+ c3 += a6 * b3[k+5];
+ c3 += a7 * b3[k+6];
+ c3 += a8 * b3[k+7];
+
+ c4 += a1 * b4[k+0];
+ c4 += a2 * b4[k+1];
+ c4 += a3 * b4[k+2];
+ c4 += a4 * b4[k+3];
+ c4 += a5 * b4[k+4];
+ c4 += a6 * b4[k+5];
+ c4 += a7 * b4[k+6];
+ c4 += a8 * b4[k+7];
+
+
+ }
+ C[i+0 + j*lda] = c1;
+ C[i+1 + j*lda] = c2;
+ C[i+2 + j*lda] = c3;
+ C[i+3 + j*lda] = c4;
+ barrier(ncores);
+ }
+ }
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-
-
- data_t *b1;
- data_t *b2;
- data_t *b3;
- data_t *b4;
- data_t c1;
- data_t c2;
- data_t c3;
- data_t c4;
- data_t a1;
- data_t a2;
- data_t a3;
- data_t a4;
- data_t a5;
- data_t a6;
- data_t a7;
- data_t a8;
- int i, j, k;
- static data_t BB[1024];
-
-
-
- //transpose B
- if (coreid == 0 | coreid == 1) {
- for ( k = 0; k < lda; k++) {
- for ( i = coreid*(lda/2); i < (coreid+1)*(lda/2); i++ ) {
- BB[i*lda + k] = B[k*lda + i];
- }
- }
- }
- barrier(ncores);
-
- for ( i = 0; i < lda; i+=4 ) {
- for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) {
- c1 = 0; c2 = 0; c3 = 0; c4 = 0;
- b1 = &BB[(i+0)*lda];
- b2 = &BB[(i+1)*lda];
- b3 = &BB[(i+2)*lda];
- b4 = &BB[(i+3)*lda];
- for ( k = 0; k < lda; k+=8 ) {
-
- a1 = A[j*lda + k+0];
- a2 = A[j*lda + k+1];
- a3 = A[j*lda + k+2];
- a4 = A[j*lda + k+3];
- a5 = A[j*lda + k+4];
- a6 = A[j*lda + k+5];
- a7 = A[j*lda + k+6];
- a8 = A[j*lda + k+7];
-
- c1 += a1 * b1[k+0];
- c1 += a2 * b1[k+1];
- c1 += a3 * b1[k+2];
- c1 += a4 * b1[k+3];
- c1 += a5 * b1[k+4];
- c1 += a6 * b1[k+5];
- c1 += a7 * b1[k+6];
- c1 += a8 * b1[k+7];
-
- c2 += a1 * b2[k+0];
- c2 += a2 * b2[k+1];
- c2 += a3 * b2[k+2];
- c2 += a4 * b2[k+3];
- c2 += a5 * b2[k+4];
- c2 += a6 * b2[k+5];
- c2 += a7 * b2[k+6];
- c2 += a8 * b2[k+7];
-
- c3 += a1 * b3[k+0];
- c3 += a2 * b3[k+1];
- c3 += a3 * b3[k+2];
- c3 += a4 * b3[k+3];
- c3 += a5 * b3[k+4];
- c3 += a6 * b3[k+5];
- c3 += a7 * b3[k+6];
- c3 += a8 * b3[k+7];
-
- c4 += a1 * b4[k+0];
- c4 += a2 * b4[k+1];
- c4 += a3 * b4[k+2];
- c4 += a4 * b4[k+3];
- c4 += a5 * b4[k+4];
- c4 += a6 * b4[k+5];
- c4 += a7 * b4[k+6];
- c4 += a8 * b4[k+7];
-
-
- }
- C[i+0 + j*lda] = c1;
- C[i+1 + j*lda] = c2;
- C[i+2 + j*lda] = c3;
- C[i+3 + j*lda] = c4;
- }
- }
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-/*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-*/
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- data_t a1;
- data_t a2;
- data_t a3;
- data_t a4;
- data_t a5;
- data_t a6;
- data_t a7;
- data_t a8;
- data_t *b1;
- data_t *b2;
- data_t *b3;
- data_t *b4;
- data_t *b5;
- data_t *b6;
- data_t *b7;
- data_t *b8;
- data_t c1;
- data_t c2;
- data_t c3;
- data_t c4;
- data_t c5;
- data_t c6;
- data_t c7;
- data_t c8;
- int i, j, k;
- int start, end;
- static data_t BB[1024];
-
-
- //transpose B
- if (coreid == 0 | coreid == 1 ) {
- for ( k = 0; k < lda; k++) {
- for ( i = coreid*(lda/2); i < (coreid+1)*(lda/2); i++ ) {
- BB[i*lda + k] = B[k*lda + i];
- }
- }
- }
- barrier(nc);
-
- for ( int x = 0; x < ncores; x++) {
- //split the i values into two chunks so the threads don't interfere on the B loads
- //this could be generalized if needed, but I won't bother since it would be tricky
- //and we already know the size and numthreads
- start = coreid == x ? 0 : 16;
- end = coreid == x ? 16 : 32;
- for ( i = start; i < end; i+=8 ) {
- for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) {
- c1=0;c2=0;c3=0;c4=0;c5=0;c6=0;c7=0;c8=0;
- b1 = &BB[(i+0)*lda];
- b2 = &BB[(i+1)*lda];
- b3 = &BB[(i+2)*lda];
- b4 = &BB[(i+3)*lda];
- b5 = &BB[(i+4)*lda];
- b6 = &BB[(i+5)*lda];
- b7 = &BB[(i+6)*lda];
- b8 = &BB[(i+7)*lda];
-
- for ( k = 0; k < lda; k+=8 ) {
- a1 = A[j*lda + k+0];
- a2 = A[j*lda + k+1];
- a3 = A[j*lda + k+2];
- a4 = A[j*lda + k+3];
- a5 = A[j*lda + k+4];
- a6 = A[j*lda + k+5];
- a7 = A[j*lda + k+6];
- a8 = A[j*lda + k+7];
-
- c1 += a1 * b1[k+0];
- c1 += a2 * b1[k+1];
- c1 += a3 * b1[k+2];
- c1 += a4 * b1[k+3];
- c1 += a5 * b1[k+4];
- c1 += a6 * b1[k+5];
- c1 += a7 * b1[k+6];
- c1 += a8 * b1[k+7];
-
- c2 += a1 * b2[k+0];
- c2 += a2 * b2[k+1];
- c2 += a3 * b2[k+2];
- c2 += a4 * b2[k+3];
- c2 += a5 * b2[k+4];
- c2 += a6 * b2[k+5];
- c2 += a7 * b2[k+6];
- c2 += a8 * b2[k+7];
-
- c3 += a1 * b3[k+0];
- c3 += a2 * b3[k+1];
- c3 += a3 * b3[k+2];
- c3 += a4 * b3[k+3];
- c3 += a5 * b3[k+4];
- c3 += a6 * b3[k+5];
- c3 += a7 * b3[k+6];
- c3 += a8 * b3[k+7];
-
- c4 += a1 * b4[k+0];
- c4 += a2 * b4[k+1];
- c4 += a3 * b4[k+2];
- c4 += a4 * b4[k+3];
- c4 += a5 * b4[k+4];
- c4 += a6 * b4[k+5];
- c4 += a7 * b4[k+6];
- c4 += a8 * b4[k+7];
-
- c5 += a1 * b5[k+0];
- c5 += a2 * b5[k+1];
- c5 += a3 * b5[k+2];
- c5 += a4 * b5[k+3];
- c5 += a5 * b5[k+4];
- c5 += a6 * b5[k+5];
- c5 += a7 * b5[k+6];
- c5 += a8 * b5[k+7];
-
- c6 += a1 * b6[k+0];
- c6 += a2 * b6[k+1];
- c6 += a3 * b6[k+2];
- c6 += a4 * b6[k+3];
- c6 += a5 * b6[k+4];
- c6 += a6 * b6[k+5];
- c6 += a7 * b6[k+6];
- c6 += a8 * b6[k+7];
-
- c7 += a1 * b7[k+0];
- c7 += a2 * b7[k+1];
- c7 += a3 * b7[k+2];
- c7 += a4 * b7[k+3];
- c7 += a5 * b7[k+4];
- c7 += a6 * b7[k+5];
- c7 += a7 * b7[k+6];
- c7 += a8 * b7[k+7];
-
- c8 += a1 * b8[k+0];
- c8 += a2 * b8[k+1];
- c8 += a3 * b8[k+2];
- c8 += a4 * b8[k+3];
- c8 += a5 * b8[k+4];
- c8 += a6 * b8[k+5];
- c8 += a7 * b8[k+6];
- c8 += a8 * b8[k+7];
- }
- C[i+0 + j*lda] += c1;
- C[i+1 + j*lda] += c2;
- C[i+2 + j*lda] += c3;
- C[i+3 + j*lda] += c4;
- C[i+4 + j*lda] += c5;
- C[i+5 + j*lda] += c6;
- C[i+6 + j*lda] += c7;
- C[i+7 + j*lda] += c8;
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-/*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-*/
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
-
- size_t sizepercore = n / ncores;
- size_t start = coreid * sizepercore;
- size_t end = (coreid + 1) * sizepercore;
- for (i = start; i < end; i++)
- {
- x[i] = x[i] + y[i];
- }
-
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ size_t i, j, k, l;
+ int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
+ data_t element, element2, element3, element4, element5, element6, element7, element8;
+ data_t B1, B2, B3, B4;
+ data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ int local_lda = lda;
+
+ for (l=coreid*local_lda/ncores; l<local_lda*(1+coreid)/ncores; l+=2){
+ row=l*32;
+ row2=(l+1)*32;
+ //element = A[row];
+ //element5 = A[row2];
+ for (i=0; i<local_lda; i+=4){
+ element = A[row+i];
+ element2 = A[row+i+1];
+ element3 = A[row+i+2];
+ element4 = A[row+i+3];
+
+ element5 = A[row2+i];
+ element6 = A[row2+i+1];
+ element7 = A[row2+i+2];
+ element8 = A[row2+i+3];
+
+ column=i*local_lda;
+ column2=(i+1)*local_lda;
+ column3=(i+2)*local_lda;
+ column4=(i+3)*local_lda;
+
+ B1 = B[column];
+ B2 = B[column2];
+ B3 = B[column3];
+ B4 = B[column4];
+
+ for (j=0; j<lda; j+=4){
+ temp_mat[j]+=element*B1+element2*B2+element3*B3+element4*B4;
+ temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
+ temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
+ temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
+
+ temp_mat2[j]+=element5*B1+element6*B2+element7*B3+element8*B4;
+ temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
+ temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
+ temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
+
+ B1 = B[column+j+4];
+ B2 = B[column2+j+4];
+ B3 = B[column3+j+4];
+ B4 = B[column4+j+4];
+
+ }
+ //element = A[row+i+4];
+ //element5 = A[row2+i+4];
+ }
+
+ for(k=0; k<local_lda; k++){
+ C[row+k]=temp_mat[k];
+ temp_mat[k]=0;
+ C[row2+k]=temp_mat2[k];
+ temp_mat2[k]=0;
+
+ }
+
+
+ }
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i,j,k,l;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<32; i+=2){
- row = i*32;
- row2 = (i+1)*32;
- for (j=0; j<16; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
-
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==12){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- else if (coreid==1){
- for (i=0; i<32; i+=2){
- row = (31-i)*32;
- row2 = (31-i-1)*32;
- for (j=16; j<32; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i,j,k,l;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<32; i+=2){
- row = i*32;
- row2 = (i+1)*32;
- for (j=0; j<16; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
-
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==12){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- else if (coreid==1){
- for (i=0; i<32; i+=2){
- row = (31-i)*32;
- row2 = (31-i-1)*32;
- for (j=16; j<32; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-data_t mult(data_t x, data_t y)
-{ data_t result = 0;
- size_t i;
- for (i=0; i < x; i++) {
- result += y;
- }
- return result;
-}
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
- void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t B1, B2, B3, B4;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- int local_lda = lda;
-
- for (l=coreid*local_lda/ncores; l<local_lda*(1+coreid)/ncores; l+=2){
- row=l*32;
- row2=(l+1)*32;
- //element = A[row];
- //element5 = A[row2];
- for (i=0; i<local_lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*local_lda;
- column2=(i+1)*local_lda;
- column3=(i+2)*local_lda;
- column4=(i+3)*local_lda;
-
- B1 = B[column];
- B2 = B[column2];
- B3 = B[column3];
- B4 = B[column4];
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B1+element2*B2+element3*B3+element4*B4;
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B1+element6*B2+element7*B3+element8*B4;
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
-
- B1 = B[column+j+4];
- B2 = B[column2+j+4];
- B3 = B[column3+j+4];
- B4 = B[column4+j+4];
-
- }
- //element = A[row+i+4];
- //element5 = A[row2+i+4];
- }
-
- for(k=0; k<local_lda; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
- }
-
-
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
- row=l*lda;
- row2=(l+1)*lda;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*lda;
- column2=(i+1)*lda;
- column3=(i+2)*lda;
- column4=(i+3)*lda;
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
-
- }
-
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
-
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=4){
- row=l*lda;
- row2=(l+1)*lda;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*lda;
- column2=(i+1)*lda;
- column3=(i+2)*lda;
- column4=(i+3)*lda;
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
-
- }
-
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
-
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-data_t mult(data_t x, data_t y)
-{ data_t result = 0;
- size_t i;
- for (i=0; i < x; i++) {
- result += y;
- }
- return result;
-}
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t B1, B2, B3, B4;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- int local_lda = lda;
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*local_lda/ncores; l<local_lda*(1+coreid)/ncores; l+=2){
- row=l*32;
- row2=(l+1)*32;
- //element = A[row];
- //element5 = A[row2];
- for (i=0; i<local_lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*local_lda;
- column2=(i+1)*local_lda;
- column3=(i+2)*local_lda;
- column4=(i+3)*local_lda;
-
- B1 = B[column];
- B2 = B[column2];
- B3 = B[column3];
- B4 = B[column4];
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B1+element2*B2+element3*B3+element4*B4;
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B1+element6*B2+element7*B3+element8*B4;
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
-
- B1 = B[column+j+4];
- B2 = B[column2+j+4];
- B3 = B[column3+j+4];
- B4 = B[column4+j+4];
-
- }
- //element = A[row+i+4];
- //element5 = A[row2+i+4];
- }
-
- for(k=0; k<local_lda; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
- }
-
-
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i;
- size_t i2;
- size_t j;
- size_t j2;
- size_t k;
- size_t k2;
- size_t max_dim = lda*lda;
- size_t block_size = lda/2;
- data_t temp_mat[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0) {
- //making a 16x16 block
- //First block: Top 16x16 block left of A and top left of B = top left of C
- //Second block: top right 16x16 right block of A and top right of B = top right of C
- for (j2= 0; j2 < 2; j2++) {
- for (i2 = 0; i2 < 2; i2++) {
- //for (j2= 0; j2 < 2; j2++) {
- //K represents which row of A and C
- for (k = 0; k < block_size; k++) {
- int rowIndex = k*32;
- for (i = i2*block_size; i < i2*block_size+block_size; i++) {
- int elementA = A[rowIndex+i];
- int columnIndex = i%32*32;
- for (j = 0; j < block_size; j++) {
- temp_mat[j] += elementA*B[columnIndex+j+j2*block_size];
- }
- }
- //Put temp_mat into actual result Matrix
- for (k2 = 0; k2 < block_size; k2++) {
- C[rowIndex+k2+j2*block_size] += temp_mat[k2];
- temp_mat[k2] = 0;
- }
- }
- }
- }
- } else {
- for (j2= 0; j2 < 2; j2++) {
- for (i2 = 0; i2 < 2; i2++) {
- //for (j2= 0; j2 < 2; j2++) {
- //K represents which row of A and C
- for (k = block_size; k < lda; k++) {
- int rowIndex = k*32;
- for (i = i2*block_size; i < i2*block_size+block_size; i++) {
- int elementA = A[rowIndex+i];
- int columnIndex = i%32*32;
- for (j = 0; j < block_size; j++) {
- temp_mat[j] += elementA*B[columnIndex+j+j2*block_size];
- }
- }
- //Put temp_mat into actual result Matrix
- for (k2 = 0; k2 < block_size; k2++) {
- C[rowIndex+k2+j2*block_size] += temp_mat[k2];
- temp_mat[k2] = 0;
- }
- }
- }
- }
- }
-
-
- //size_t half_lda = lda/2;
- // k = which pair of row we're on
-
-
-
-
-
-
-/*
- for (k = coreid*lda/ncores; k < (lda/ncores + coreid*lda/ncores); k += 2) {
- //printf("%d", k);
- for (i = 0; i < lda ; i++) {
- int elementA = A[32*k+i];
- int elementA2 = A[i + 32*(k+1)];
- int column = i%32*32;
- for (j = 0; j < lda; j++) {
- C[32*k + j] += elementA*B[column+j];
- C[32*(k+1) + j] += elementA2*B[column+j];
- }
- }
-
- }
-*/
-
-/*
- data_t element=A[i];
- data_t element2 = A[i+1];
- data_t element3 = A[i+2];
- data_t element4 = A[i+3];
- data_t element5 = A[i+4];
- data_t element6 = A[i+5];
- data_t element7 = A[i+6];
- data_t element8 = A[i+7];
- int row= (int)(i/32)*32;
- int row2 = (i+1)/32*32;
- int row3 = (i+2)/32*32;
- int row4 = (i+3)/32*32;
- int row5 = (i+4)/32*32;
- int row6 = (i+5)/32*32;
- int row7 = (i+6)/32*32;
- int row8 = (i+7)/32*32;
- int column = i%32*32;
- int column2 = (i+1)%32*32;
- int column3 = (i+2)%32*32;
- int column4 = (i+3)%32*32;
- int column5 = (i+4)%32*32;
- int column6 = (i+5)%32*32;
- int column7 = (i+6)%32*32;
-
- */
-
- //int column8 = (i+7)%32*32;
-
- /*
- for (j=0; j < lda; j++) {
- sum = B[
- C[row+j]+=element*B[column+j];
- C[row2+j]+=element2*B[column2+j];
- C[row3+j]+=element3*B[column3+j];
- C[row4+j]+=element4*B[column4+j];
- C[row5+j]+=element5*B[column5+j];
- C[row6+j]+=element6*B[column6+j];
- C[row7+j]+=element7*B[column7+j];
- C[row8+j]+=element8*B[column8+j];
- C[row+j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j];
- }
- }
- */
-
-
-
-
-
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i;
- size_t i2;
- size_t j;
- size_t j2;
- size_t k;
- size_t k2;
- size_t max_dim = lda*lda;
- size_t block_size = lda/2;
- int result = 0;
- data_t temp_mat1[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0) {
- for (k = 0; k < lda/2; k++) {
- int columnIndex = 32*k;
-
- //temp_mat1 will store the kth column of B
- for (i = 0; i < lda; i++) {
- temp_mat1[i] = B[32*i + k];
- }
-
- for (j =0; j < lda; j++) {
- int rowIndex = 32*j;
- //iterate through each element of A in row J and accumulate result
- for (i2 = 0; i2 <lda; i2 += 4) {
- int elementA = A[rowIndex+i2];
- int elementA2 = A[rowIndex+i2+1];
- int elementA3 = A[rowIndex+i2+2];
- int elementA4 = A[rowIndex+i2+3];
- result += elementA*temp_mat1[i2] + elementA2*temp_mat1[i2+1] + elementA3*temp_mat1[i2+2] + elementA4*temp_mat1[i2+3] ;
- }
- C[k+rowIndex] = result;
- result = 0;
- }
-
- }
- } else {
- for (k = lda/2; k < lda; k++) {
- int columnIndex = 32*k;
-
- //temp_mat1 will store the kth column of B
- for (i = 0; i < lda; i++) {
- temp_mat1[i] = B[32*i + k];
- }
-
- for (j =0; j < lda; j++) {
- int rowIndex = 32*j;
- //iterate through each element of A in row J and accumulate result
- for (i2 = 0; i2 <lda; i2 += 4) {
- int elementA = A[rowIndex+i2];
- int elementA2 = A[rowIndex+i2+1];
- int elementA3 = A[rowIndex+i2+2];
- int elementA4 = A[rowIndex+i2+3];
- result += elementA*temp_mat1[i2] + elementA2*temp_mat1[i2+1] + elementA3*temp_mat1[i2+2] + elementA4*temp_mat1[i2+3] ;
- }
- C[k+rowIndex] = result;
- result = 0;
- }
-
- }
-
- }
-
-
-
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row, row2, row3, row4, column, column2;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat3[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat4[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=4){
- row=l*lda;
- row2=(l+1)*lda;
- row3=(l+2)*lda;
- row4=(l+3)*lda;
- for (i=0; i<lda; i+=2){
- element = A[row+i];
- element2 = A[row+i+1];
-
- element3 = A[row2+i];
- element4 = A[row2+i+1];
-
- element5 = A[row3+i];
- element6 = A[row3+i+1];
-
- element7 = A[row4+i];
- element8 = A[row4+i+1];
-
- column=i*lda;
- column2=(i+1)*lda;
-
- for (j=0; j<lda; j+=2){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j];
- temp_mat2[j]+=element3*B[column+j]+element4*B[column2+j];
- temp_mat3[j]+=element5*B[column+j]+element6*B[column2+j];
- temp_mat4[j]+=element7*B[column+j]+element8*B[column2+j];
-
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1];
- temp_mat2[j+1]+=element3*B[column+j+1]+element4*B[column2+j+1];
- temp_mat3[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1];
- temp_mat4[j+1]+=element7*B[column+j+1]+element8*B[column2+j+1];
-
-
-
- }
-
- }
-
- for(k=0; k<32; k+= 4){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
- C[row3+k]=temp_mat3[k];
- temp_mat3[k]=0;
- C[row4+k]=temp_mat4[k];
- temp_mat4[k]=0;
-
- C[row+k+1]=temp_mat[k+1];
- temp_mat[k+1]=0;
- C[row2+k+1]=temp_mat2[k+1];
- temp_mat2[k+1]=0;
- C[row3+k+1]=temp_mat3[k+1];
- temp_mat3[k+1]=0;
- C[row4+k+1]=temp_mat4[k+1];
- temp_mat4[k+1]=0;
-
- C[row+k+2]=temp_mat[k+2];
- temp_mat[k+2]=0;
- C[row2+k+2]=temp_mat2[k+2];
- temp_mat2[k+2]=0;
- C[row3+k+2]=temp_mat3[k+2];
- temp_mat3[k+2]=0;
- C[row4+k+2]=temp_mat4[k+2];
- temp_mat4[k+2]=0;
-
- C[row+k+3]=temp_mat[k+3];
- temp_mat[k+3]=0;
- C[row2+k+3]=temp_mat2[k+3];
- temp_mat2[k+3]=0;
- C[row3+k+3]=temp_mat3[k+3];
- temp_mat3[k+3]=0;
- C[row4+k+3]=temp_mat4[k+3];
- temp_mat4[k+3]=0;
-
-
-
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
- row=l*lda;
- row2=(l+1)*lda;
- if (coreid == 0) {
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*lda;
- column2=(i+1)*lda;
- column3=(i+2)*lda;
- column4=(i+3)*lda;
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
-
- }
-
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
-
- }
- } else {
-
- for (i=0; i<lda; i += 4){
- element = A[row-i+lda-1];
- element2 = A[row-i-1+lda-1];
- element3 = A[row-i-2+lda-1];
- element4 = A[row-i-3+lda-1];
-
- element5 = A[row2-i+lda-1];
- element6 = A[row2-i-1+lda-1];
- element7 = A[row2-i-2+lda-1];
- element8 = A[row2-i-3+lda-1];
-
- column=(-i+lda-1)*lda;
- column2=(-i-1+lda-1)*lda;
- column3=(-i-2+lda-1)*lda;
- column4=(-i-3+lda-1)*lda;
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
-
- }
-
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
-
- }
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
- /*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- */
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
- row=l*lda;
- row2=(l+1)*lda;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*lda;
- column2=(i+1)*lda;
- column3=(i+2)*lda;
- column4=(i+3)*lda;
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
-
- }
-
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
-
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-data_t mult(data_t x, data_t y)
-{ data_t result = 0;
- size_t i;
- for (i=0; i < x; i++) {
- result += y;
- }
- return result;
-}
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, row3, row4, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t element9, element10, element11, element12, element13, element14, element15, element16;
- data_t elementB1,elementB2,elementB3,elementB4;
- data_t temp_mat[128]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=4){
- row=l*lda;
- row2=(l+1)*lda;
- row3=(l+2)*lda;
- row4=(l+3)*lda;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- element9 = A[row3+i];
- element10 = A[row3+i+1];
- element11 = A[row3+i+2];
- element12 = A[row3+i+3];
-
- element13 = A[row4+i];
- element14 = A[row4+i+1];
- element15 = A[row4+i+2];
- element16 = A[row4+i+3];
-
- column=i*lda;
- column2=(i+1)*lda;
- column3=(i+2)*lda;
- column4=(i+3)*lda;
-
-
- for (j=0; j<lda; j+=4){
-
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat[j+lda]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat[j+1+lda]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat[j+2+lda]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat[j+3+lda]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
-
- temp_mat[j+2*lda]+=element9*B[column+j]+element10*B[column2+j]+element11*B[column3+j]+element12*B[column4+j];
- temp_mat[j+1+2*lda]+=element9*B[column+j+1]+element10*B[column2+j+1]+element11*B[column3+j+1]+element12*B[column4+j+1];
- temp_mat[j+2+2*lda]+=element9*B[column+j+2]+element10*B[column2+j+2]+element11*B[column3+j+2]+element12*B[column4+j+2];
- temp_mat[j+3+2*lda]+=element9*B[column+j+3]+element10*B[column2+j+3]+element11*B[column3+j+3]+element12*B[column4+j+3];
-
- temp_mat[j+3*lda]+=element13*B[column+j]+element14*B[column2+j]+element15*B[column3+j]+element16*B[column4+j];
- temp_mat[j+1+3*lda]+=element13*B[column+j+1]+element14*B[column2+j+1]+element15*B[column3+j+1]+element16*B[column4+j+1];
- temp_mat[j+2+3*lda]+=element13*B[column+j+2]+element14*B[column2+j+2]+element15*B[column3+j+2]+element16*B[column4+j+2];
- temp_mat[j+3+3*lda]+=element13*B[column+j+3]+element14*B[column2+j+3]+element15*B[column3+j+3]+element16*B[column4+j+3];
-
-
- }
-
- }
-
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat[k+lda];
- temp_mat[k+lda]=0;
- C[row3+k]=temp_mat[k+2*lda];
- temp_mat[k+2*lda]=0;
- C[row4+k]=temp_mat[k+3*lda];
- temp_mat[k+3*lda]=0;
-
-
- }
-
-
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-data_t mult(data_t x, data_t y)
-{ data_t result = 0;
- size_t i;
- for (i=0; i < x; i++) {
- result += y;
- }
- return result;
-}
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
- void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t B1, B2, B3, B4;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- int local_lda = lda;
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*local_lda/ncores; l<local_lda*(1+coreid)/ncores; l+=2){
- row=l*32;
- row2=(l+1)*32;
- //element = A[row];
- //element5 = A[row2];
- for (i=0; i<local_lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
-
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
-
- column=i*local_lda;
- column2=(i+1)*local_lda;
- column3=(i+2)*local_lda;
- column4=(i+3)*local_lda;
-
- B1 = B[column];
- B2 = B[column2];
- B3 = B[column3];
- B4 = B[column4];
-
- for (j=0; j<lda; j+=4){
- temp_mat[j]+=element*B1+element2*B2+element3*B3+element4*B4;
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
-
- temp_mat2[j]+=element5*B1+element6*B2+element7*B3+element8*B4;
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
-
- B1 = B[column+j+4];
- B2 = B[column2+j+4];
- B3 = B[column3+j+4];
- B4 = B[column4+j+4];
-
- }
- //element = A[row+i+4];
- //element5 = A[row2+i+4];
- }
-
- for(k=0; k<local_lda; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- C[row2+k]=temp_mat2[k];
- temp_mat2[k]=0;
-
- }
-
-
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: Felix Li $ Ronald Lee
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i,j,k,l;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<32; i+=2){
- row = i*32;
- row2 = (i+1)*32;
- for (j=0; j<16; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
-
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
-
-
- }
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
-
- }
- }
- else {
- for (i=0; i<32; i+=2){
- row = (31-i)*32;
- row2 = (31-i-1)*32;
- for (j=16; j<32; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
-
-
-
- }
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: Felix Li $ Ronald Lee
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i,j,k,l;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<32; i+=2){
- row = i*32;
- row2 = (i+1)*32;
- for (j=0; j<16; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
-
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==12){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- else if (coreid==1){
- for (i=0; i<32; i+=2){
- row = (31-i)*32;
- row2 = (31-i-1)*32;
- for (j=16; j<32; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
-
- size_t i;
-
- if (coreid == 0)
- {
- for (i = 0; i < n/2; i++) {
- x[i] = x[i] + y[i];
- }
- } else {
- for (i = n/2; i < n; i++) {
- x[i] = x[i] + y[i];
- }
- }
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+#include "util.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+
+ for ( i = 0; i < lda; i+=2 )
+ {
+ for (k = 0; k < lda; k+=4)
+ {
+ int d0 = B[k*lda + i];
+ int c0 = B[k*lda + i + 1];
+ int d1 = B[(k+1)*lda + i];
+ int c1 = B[(k+1)*lda + i + 1];
+ int d2 = B[(k+2)*lda + i];
+ int c2 = B[(k+2)*lda + i + 1];
+ int d3 = B[(k+3)*lda + i];
+ int c3 = B[(k+3)*lda + i + 1];
+
+ for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4)
+ {
+
+ int sum = A[j*lda + k] * d0;
+ sum += A[j*lda + k + 1] * d1;
+ sum += A[j*lda + k + 2] * d2;
+ sum += A[j*lda + k + 3] * d3;
+ C[j*lda +i] += sum;
+
+ sum = A[j*lda + k] * c0;
+ sum += A[j*lda + k + 1] * c1;
+ sum += A[j*lda + k + 2] * c2;
+ sum += A[j*lda + k + 3] * c3;
+ C[j*lda + i + 1] += sum;
+
+ sum = A[(j+1)*lda + k] * d0;
+ sum += A[(j+1)*lda + k + 1] * d1;
+ sum += A[(j+1)*lda + k + 2] * d2;
+ sum += A[(j+1)*lda + k + 3] * d3;
+ C[(j+1)*lda +i] += sum;
+
+ sum = A[(j+1)*lda + k] * c0;
+ sum += A[(j+1)*lda + k + 1] * c1;
+ sum += A[(j+1)*lda + k + 2] * c2;
+ sum += A[(j+1)*lda + k + 3] * c3;
+ C[(j+1)*lda + i + 1] += sum;
+
+ sum = A[(j+2)*lda + k] * d0;
+ sum += A[(j+2)*lda + k + 1] * d1;
+ sum += A[(j+2)*lda + k + 2] * d2;
+ sum += A[(j+2)*lda + k + 3] * d3;
+ C[(j+2)*lda +i] += sum;
+
+ sum = A[(j+2)*lda + k] * c0;
+ sum += A[(j+2)*lda + k + 1] * c1;
+ sum += A[(j+2)*lda + k + 2] * c2;
+ sum += A[(j+2)*lda + k + 3] * c3;
+ C[(j+2)*lda + i + 1] += sum;
+
+ sum = A[(j+3)*lda + k] * d0;
+ sum += A[(j+3)*lda + k + 1] * d1;
+ sum += A[(j+3)*lda + k + 2] * d2;
+ sum += A[(j+3)*lda + k + 3] * d3;
+ C[(j+3)*lda +i] += sum;
+
+ sum = A[(j+3)*lda + k] * c0;
+ sum += A[(j+3)*lda + k + 1] * c1;
+ sum += A[(j+3)*lda + k + 2] * c2;
+ sum += A[(j+3)*lda + k + 3] * c3;
+ C[(j+3)*lda + i + 1] += sum;
+
+ }
+ barrier(ncores);
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
-unsigned long _c = -rdcycle(), _i = -rdinstret(); \
-code; \
-_c += rdcycle(), _i += rdinstret(); \
-if (coreid == 0) \
-printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
-stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
-} while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- for ( i = 0; i < lda; i+=2 )
- {
- for (k = 0; k < lda; k+=4)
- {
- int d0 = B[k*lda + i];
- int c0 = B[k*lda + i + 1];
- int d1 = B[(k+1)*lda + i];
- int c1 = B[(k+1)*lda + i + 1];
- int d2 = B[(k+2)*lda + i];
- int c2 = B[(k+2)*lda + i + 1];
- int d3 = B[(k+3)*lda + i];
- int c3 = B[(k+3)*lda + i + 1];
-
- for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4)
- {
-
- int sum = A[j*lda + k] * d0;
- sum += A[j*lda + k + 1] * d1;
- sum += A[j*lda + k + 2] * d2;
- sum += A[j*lda + k + 3] * d3;
- C[j*lda +i] += sum;
-
- sum = A[j*lda + k] * c0;
- sum += A[j*lda + k + 1] * c1;
- sum += A[j*lda + k + 2] * c2;
- sum += A[j*lda + k + 3] * c3;
- C[j*lda + i + 1] += sum;
-
- sum = A[(j+1)*lda + k] * d0;
- sum += A[(j+1)*lda + k + 1] * d1;
- sum += A[(j+1)*lda + k + 2] * d2;
- sum += A[(j+1)*lda + k + 3] * d3;
- C[(j+1)*lda +i] += sum;
-
- sum = A[(j+1)*lda + k] * c0;
- sum += A[(j+1)*lda + k + 1] * c1;
- sum += A[(j+1)*lda + k + 2] * c2;
- sum += A[(j+1)*lda + k + 3] * c3;
- C[(j+1)*lda + i + 1] += sum;
-
- sum = A[(j+2)*lda + k] * d0;
- sum += A[(j+2)*lda + k + 1] * d1;
- sum += A[(j+2)*lda + k + 2] * d2;
- sum += A[(j+2)*lda + k + 3] * d3;
- C[(j+2)*lda +i] += sum;
-
- sum = A[(j+2)*lda + k] * c0;
- sum += A[(j+2)*lda + k + 1] * c1;
- sum += A[(j+2)*lda + k + 2] * c2;
- sum += A[(j+2)*lda + k + 3] * c3;
- C[(j+2)*lda + i + 1] += sum;
-
- sum = A[(j+3)*lda + k] * d0;
- sum += A[(j+3)*lda + k + 1] * d1;
- sum += A[(j+3)*lda + k + 2] * d2;
- sum += A[(j+3)*lda + k + 3] * d3;
- C[(j+3)*lda +i] += sum;
-
- sum = A[(j+3)*lda + k] * c0;
- sum += A[(j+3)*lda + k + 1] * c1;
- sum += A[(j+3)*lda + k + 2] * c2;
- sum += A[(j+3)*lda + k + 3] * c3;
- C[(j+3)*lda + i + 1] += sum;
-
- }
- barrier(ncores);
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
-unsigned long _c = -rdcycle(), _i = -rdinstret(); \
-code; \
-_c += rdcycle(), _i += rdinstret(); \
-if (coreid == 0) \
-printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
-stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
-} while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- for ( i = 0; i < lda; i+=2 )
- {
- for (k = 0; k < lda; k+=4)
- {
- int d0 = B[k*lda + i];
- int c0 = B[k*lda + i + 1];
- int d1 = B[(k+1)*lda + i];
- int c1 = B[(k+1)*lda + i + 1];
- int d2 = B[(k+2)*lda + i];
- int c2 = B[(k+2)*lda + i + 1];
- int d3 = B[(k+3)*lda + i];
- int c3 = B[(k+3)*lda + i + 1];
-
- for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4)
- {
-
- int sum = A[j*lda + k] * d0;
- sum += A[j*lda + k + 1] * d1;
- sum += A[j*lda + k + 2] * d2;
- sum += A[j*lda + k + 3] * d3;
- C[j*lda +i] += sum;
-
- sum = A[j*lda + k] * c0;
- sum += A[j*lda + k + 1] * c1;
- sum += A[j*lda + k + 2] * c2;
- sum += A[j*lda + k + 3] * c3;
- C[j*lda + i + 1] += sum;
-
- sum = A[(j+1)*lda + k] * d0;
- sum += A[(j+1)*lda + k + 1] * d1;
- sum += A[(j+1)*lda + k + 2] * d2;
- sum += A[(j+1)*lda + k + 3] * d3;
- C[(j+1)*lda +i] += sum;
-
- sum = A[(j+1)*lda + k] * c0;
- sum += A[(j+1)*lda + k + 1] * c1;
- sum += A[(j+1)*lda + k + 2] * c2;
- sum += A[(j+1)*lda + k + 3] * c3;
- C[(j+1)*lda + i + 1] += sum;
-
- sum = A[(j+2)*lda + k] * d0;
- sum += A[(j+2)*lda + k + 1] * d1;
- sum += A[(j+2)*lda + k + 2] * d2;
- sum += A[(j+2)*lda + k + 3] * d3;
- C[(j+2)*lda +i] += sum;
-
- sum = A[(j+2)*lda + k] * c0;
- sum += A[(j+2)*lda + k + 1] * c1;
- sum += A[(j+2)*lda + k + 2] * c2;
- sum += A[(j+2)*lda + k + 3] * c3;
- C[(j+2)*lda + i + 1] += sum;
-
- sum = A[(j+3)*lda + k] * d0;
- sum += A[(j+3)*lda + k + 1] * d1;
- sum += A[(j+3)*lda + k + 2] * d2;
- sum += A[(j+3)*lda + k + 3] * d3;
- C[(j+3)*lda +i] += sum;
-
- sum = A[(j+3)*lda + k] * c0;
- sum += A[(j+3)*lda + k + 1] * c1;
- sum += A[(j+3)*lda + k + 2] * c2;
- sum += A[(j+3)*lda + k + 3] * c3;
- C[(j+3)*lda + i + 1] += sum;
-
- }
- barrier(nc);
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
-
- for (i = coreid*(n/2); i < (coreid+1)*(n/2); i++){
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+#include "util.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+//----------MSI--------------
+///*
+ int i,j,k;
+ barrier(ncores);
+ for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
+ for(i = 0; i < lda; i+=4) {
+ data_t Cval0 = 0;
+ data_t Cval1 = 0;
+ data_t Cval2 = 0;
+ data_t Cval3 = 0;
+ for(k = 0; k < lda; k++) {
+ Cval0 += A[j*lda+k]*B[k*lda+i];
+ Cval1 += A[j*lda+k]*B[k*lda+i+1];
+ Cval2 += A[j*lda+k]*B[k*lda+i+2];
+ Cval3 += A[j*lda+k]*B[k*lda+i+3];
+ }
+ C[j*lda+i] = Cval0;
+ C[j*lda+i+1] = Cval1;
+ C[j*lda+i+2] = Cval2;
+ C[j*lda+i+3] = Cval3;
+ }
+ }
+//*/
+
+//------------------MI-------------------
+/*
+ int i,j,k;
+ barrier(nc);
+ for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
+ for(i = 0; i < lda; i+=4) {
+ data_t Cval0 = 0;
+ data_t Cval1 = 0;
+ data_t Cval2 = 0;
+ data_t Cval3 = 0;
+ if(coreid == 0) {
+ for(k = 0; k < lda; k++) {
+ Cval0 += A[j*lda+k]*B[k*lda+i];
+ Cval1 += A[j*lda+k]*B[k*lda+i+1];
+ Cval2 += A[j*lda+k]*B[k*lda+i+2];
+ Cval3 += A[j*lda+k]*B[k*lda+i+3];
+ }
+ } else {
+ for(k = lda-1; k >= 0; k--) {
+ Cval0 += A[j*lda+k]*B[k*lda+i];
+ Cval1 += A[j*lda+k]*B[k*lda+i+1];
+ Cval2 += A[j*lda+k]*B[k*lda+i+2];
+ Cval3 += A[j*lda+k]*B[k*lda+i+3];
+ }
+ }
+ C[j*lda+i] = Cval0;
+ C[j*lda+i+1] = Cval1;
+ C[j*lda+i+2] = Cval2;
+ C[j*lda+i+3] = Cval3;
+ }
+ }
+*/
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-//----------MSI--------------
-///*
- int i,j,k;
- barrier(ncores);
- for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
- for(i = 0; i < lda; i+=4) {
- data_t Cval0 = 0;
- data_t Cval1 = 0;
- data_t Cval2 = 0;
- data_t Cval3 = 0;
- for(k = 0; k < lda; k++) {
- Cval0 += A[j*lda+k]*B[k*lda+i];
- Cval1 += A[j*lda+k]*B[k*lda+i+1];
- Cval2 += A[j*lda+k]*B[k*lda+i+2];
- Cval3 += A[j*lda+k]*B[k*lda+i+3];
- }
- C[j*lda+i] = Cval0;
- C[j*lda+i+1] = Cval1;
- C[j*lda+i+2] = Cval2;
- C[j*lda+i+3] = Cval3;
- }
- }
-//*/
-
-//------------------MI-------------------
-/*
- int i,j,k;
- barrier(nc);
- for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
- for(i = 0; i < lda; i+=4) {
- data_t Cval0 = 0;
- data_t Cval1 = 0;
- data_t Cval2 = 0;
- data_t Cval3 = 0;
- if(coreid == 0) {
- for(k = 0; k < lda; k++) {
- Cval0 += A[j*lda+k]*B[k*lda+i];
- Cval1 += A[j*lda+k]*B[k*lda+i+1];
- Cval2 += A[j*lda+k]*B[k*lda+i+2];
- Cval3 += A[j*lda+k]*B[k*lda+i+3];
- }
- } else {
- for(k = lda-1; k >= 0; k--) {
- Cval0 += A[j*lda+k]*B[k*lda+i];
- Cval1 += A[j*lda+k]*B[k*lda+i+1];
- Cval2 += A[j*lda+k]*B[k*lda+i+2];
- Cval3 += A[j*lda+k]*B[k*lda+i+3];
- }
- }
- C[j*lda+i] = Cval0;
- C[j*lda+i+1] = Cval1;
- C[j*lda+i+2] = Cval2;
- C[j*lda+i+3] = Cval3;
- }
- }
-*/
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-//----------MSI--------------
-/*
- int i,j,k;
- barrier(nc);
- for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
- for(i = 0; i < lda; i+=4) {
- data_t Cval0 = 0;
- data_t Cval1 = 0;
- data_t Cval2 = 0;
- data_t Cval3 = 0;
- for(k = 0; k < lda; k++) {
- Cval0 += A[j*lda+k]*B[k*lda+i];
- Cval1 += A[j*lda+k]*B[k*lda+i+1];
- Cval2 += A[j*lda+k]*B[k*lda+i+2];
- Cval3 += A[j*lda+k]*B[k*lda+i+3];
- }
- C[j*lda+i] = Cval0;
- C[j*lda+i+1] = Cval1;
- C[j*lda+i+2] = Cval2;
- C[j*lda+i+3] = Cval3;
- }
- }
-*/
-
-//------------------MI-------------------
-
- int i,j,k;
- barrier(nc);
- for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
- for(i = 0; i < lda; i+=4) {
- data_t Cval0 = 0;
- data_t Cval1 = 0;
- data_t Cval2 = 0;
- data_t Cval3 = 0;
- if(coreid == 0) {
- for(k = 0; k < lda; k++) {
- Cval0 += A[j*lda+k]*B[k*lda+i];
- Cval1 += A[j*lda+k]*B[k*lda+i+1];
- Cval2 += A[j*lda+k]*B[k*lda+i+2];
- Cval3 += A[j*lda+k]*B[k*lda+i+3];
- }
- } else {
- for(k = lda-1; k >= 0; k--) {
- Cval0 += A[j*lda+k]*B[k*lda+i];
- Cval1 += A[j*lda+k]*B[k*lda+i+1];
- Cval2 += A[j*lda+k]*B[k*lda+i+2];
- Cval3 += A[j*lda+k]*B[k*lda+i+3];
- }
- }
- C[j*lda+i] = Cval0;
- C[j*lda+i+1] = Cval1;
- C[j*lda+i+2] = Cval2;
- C[j*lda+i+3] = Cval3;
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
-
- for (i = coreid*n/ncores; i < coreid*n/ncores + n/ncores; i++) {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-void matrix_sub(int size, data_t A[], data_t B[], data_t C[]) {
- if (coreid != 0)
- return;
-
- for(int i = 0; i < size; i++){
- C[i] = A[i] + B[i];
- }
-}
-
-void matrix_add(int size, data_t A[], data_t B[], data_t C[]) {
- if (coreid != 0)
- return;
-
- for(int i = 0; i < size; i++){
- C[i] = A[i] - B[i];
- }
-}
-
-void strassen_mult(int dime, const data_t sA[], const data_t sB[], data_t sC[]) {
-
- if (coreid != 0)
- return;
-
- int height, width;
- int sub_size = dime*dime/4;
-
-// data_t A_11[sub_size], B_11[sub_size], C_11[sub_size],
-// A_12[sub_size], B_12[sub_size], C_12[sub_size],
-// A_21[sub_size], B_21[sub_size], C_21[sub_size],
-// A_22[sub_size], B_22[sub_size], C_22[sub_size];
-
- data_t *A_11 = malloc(sub_size*sizeof(data_t));
- data_t *A_12 = malloc(sub_size*sizeof(data_t));
- data_t *A_21 = malloc(sub_size*sizeof(data_t));
- data_t *A_22 = malloc(sub_size*sizeof(data_t));
- data_t *B_11 = malloc(sub_size*sizeof(data_t));
- data_t *B_12 = malloc(sub_size*sizeof(data_t));
- data_t *B_21 = malloc(sub_size*sizeof(data_t));
- data_t *B_22 = malloc(sub_size*sizeof(data_t));
-
- for(height=0; height < dime/2; height++) {
- for(width= 0; width < dime/2; width++) {
- A_11[width+(height*dime/2)] = sA[width + height*dime];
- B_11[width+(height*dime/2)] = sB[width + height*dime];
-
- A_12[width+(height*dime/2)] = sA[dime/2 + width + height*dime];
- B_12[width+(height*dime/2)] = sB[dime/2 + width + height*dime];
-
- A_21[width+(height*dime/2)] = sA[(dime*dime)/2 + width + height*dime];
- B_21[width+(height*dime/2)] = sB[(dime*dime)/2 + width + height*dime];
-
- A_22[width+(height*dime/2)] = sA[(dime*dime)/2 + dime/2 + width + height*dime];
- B_22[width+(height*dime/2)] = sB[(dime*dime)/2 + dime/2 + width + height*dime];
- }
- }
-
-// data_t H_1[sub_size], H_2[sub_size], H_3[sub_size], H_4[sub_size], H_5[sub_size],
-// H_6[sub_size], H_7[sub_size], H_8[sub_size], H_9[sub_size], H_10[sub_size],
-// H_11[sub_size], H_12[sub_size], H_13[sub_size], H_14[sub_size],
-// H_15[sub_size], H_16[sub_size], H_17[sub_size], H_18[sub_size];
-
- data_t *H_1 = malloc(sub_size*sizeof(data_t));
- data_t *H_2 = malloc(sub_size*sizeof(data_t));
- data_t *H_3 = malloc(sub_size*sizeof(data_t));
- data_t *H_4 = malloc(sub_size*sizeof(data_t));
- data_t *H_5 = malloc(sub_size*sizeof(data_t));
- data_t *H_6 = malloc(sub_size*sizeof(data_t));
- data_t *H_7 = malloc(sub_size*sizeof(data_t));
- data_t *H_8 = malloc(sub_size*sizeof(data_t));
- data_t *H_9 = malloc(sub_size*sizeof(data_t));
- data_t *H_10 = malloc(sub_size*sizeof(data_t));
-
- matrix_add(sub_size, A_11, A_22, H_1); //Helper1
- matrix_add(sub_size, B_11, B_22, H_2); //Helper2
- matrix_add(sub_size, A_21, A_22, H_3); //Helper3
- matrix_sub(sub_size, B_12, B_22, H_4); //Helper4
- matrix_sub(sub_size, B_21, B_11, H_5); //Helper5
- matrix_add(sub_size, A_11, A_12, H_6); //Helper6
- matrix_sub(sub_size, A_21, A_11, H_7); //Helper7
- matrix_add(sub_size, B_11, B_12, H_8); //Helper8
- matrix_sub(sub_size, A_12, A_22, H_9); //Helper9
- matrix_add(sub_size, B_21, B_22, H_10); //Helper10
-
- free(A_12);
- free(A_21);
- free(B_12);
- free(B_21);
-
- A_12 = NULL;
- A_21 = NULL;
- B_12 = NULL;
- B_21 = NULL;
-
-// data_t M_1[sub_size], M_2[sub_size], M_3[sub_size], M_4[sub_size],
-// M_5[sub_size], M_6[sub_size], M_7[sub_size];
-
- data_t *M_1 = malloc(sub_size*sizeof(data_t));
- data_t *M_2 = malloc(sub_size*sizeof(data_t));
- data_t *M_3 = malloc(sub_size*sizeof(data_t));
- data_t *M_4 = malloc(sub_size*sizeof(data_t));
- data_t *M_5 = malloc(sub_size*sizeof(data_t));
- data_t *M_6 = malloc(sub_size*sizeof(data_t));
- data_t *M_7 = malloc(sub_size*sizeof(data_t));
-
- if (sub_size == 1) {
- M_1[0] = H_1[0]*H_2[0];
- M_2[0] = H_3[0]*B_11[0];
- M_3[0] = A_11[0]*H_4[0];
- M_4[0] = A_22[0]*H_5[0];
- M_5[0] = H_6[0]*B_22[0];
- M_6[0] = H_7[0]*H_8[0];
- M_7[0] = H_9[0]*H_10[0];
- } else {
- strassen_mult(dime/2, H_1, H_2, M_1);
- strassen_mult(dime/2, H_3, B_11, M_2);
- strassen_mult(dime/2, A_11, H_4, M_3);
- strassen_mult(dime/2, A_22, H_5, M_4);
- strassen_mult(dime/2, H_6, B_22, M_5);
- strassen_mult(dime/2, H_7, H_8, M_6);
- strassen_mult(dime/2, H_9, H_10, M_7);
- }
-
- free(A_11);
- free(A_22);
- free(B_11);
- free(B_22);
-
- A_11 = NULL;
- A_22 = NULL;
- B_11 = NULL;
- B_22 = NULL;
-
- free(H_1);
- free(H_2);
- free(H_3);
- free(H_4);
- free(H_5);
- free(H_6);
- free(H_7);
- free(H_8);
- free(H_9);
- free(H_10);
-
- H_1 = NULL;
- H_2 = NULL;
- H_3 = NULL;
- H_4 = NULL;
- H_5 = NULL;
- H_6 = NULL;
- H_7 = NULL;
- H_8 = NULL;
- H_9 = NULL;
- H_10 = NULL;
-
- data_t *H_11 = malloc(sub_size*sizeof(data_t));
- data_t *H_12 = malloc(sub_size*sizeof(data_t));
- data_t *H_13 = malloc(sub_size*sizeof(data_t));
- data_t *H_14 = malloc(sub_size*sizeof(data_t));
-
- data_t *C_11 = malloc(sub_size*sizeof(data_t));
- data_t *C_12 = malloc(sub_size*sizeof(data_t));
- data_t *C_21 = malloc(sub_size*sizeof(data_t));
- data_t *C_22 = malloc(sub_size*sizeof(data_t));
-
- matrix_add(sub_size, M_1, M_4, H_11);
- matrix_add(sub_size, M_5, M_7, H_12);
- matrix_sub(sub_size, H_11, H_12, C_11);
-
- matrix_add(sub_size, M_3, M_5, C_12);
-
- matrix_add(sub_size, M_2, M_4, C_21);
-
- matrix_sub(sub_size, M_1, M_2, H_13);
- matrix_add(sub_size, M_3, M_6, H_14);
- matrix_add(sub_size, H_13, H_14, C_22);
-
- free(H_11);
- free(H_12);
- free(H_13);
- free(H_14);
-
- H_11 = NULL;
- H_12 = NULL;
- H_13 = NULL;
- H_14 = NULL;
-
-
- for(height=0; height < dime/2; height++) {
- for(width= 0; width < dime/2; width++) {
- sC[width + height*dime] = C_11[width+(height*dime/2)];
- sC[dime/2 + width + height*dime] = C_12[width+(height*dime/2)];
- sC[(dime*dime)/2 + width + height*dime] = C_21[width+(height*dime/2)];
- sC[(dime*dime)/2 + dime/2 + width + height*dime] = C_22[width+(height*dime/2)];
- }
- }
-
- free(C_11);
- free(C_12);
- free(C_21);
- free(C_22);
-
- C_11 = NULL;
- C_12 = NULL;
- C_21 = NULL;
- C_22 = NULL;
-
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- if (coreid > 0)
- return;
-
- strassen_mult(lda, A, B, C);
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-void matrix_sub(int size, data_t A[], data_t B[], data_t C[]) {
- if (coreid != 0)
- return;
-
- for(int i = 0; i < size; i++){
- C[i] = A[i] + B[i];
- }
-}
-
-void matrix_add(int size, data_t A[], data_t B[], data_t C[]) {
- if (coreid != 0)
- return;
-
- for(int i = 0; i < size; i++){
- C[i] = A[i] - B[i];
- }
-}
-
-void strassen_mult(int dime, const data_t sA[], const data_t sB[], data_t sC[]) {
-
- if (coreid != 0)
- return;
-
- int height, width;
- int sub_size = dime*dime/4;
-
-// data_t A_11[sub_size], B_11[sub_size], C_11[sub_size],
-// A_12[sub_size], B_12[sub_size], C_12[sub_size],
-// A_21[sub_size], B_21[sub_size], C_21[sub_size],
-// A_22[sub_size], B_22[sub_size], C_22[sub_size];
-
- data_t *A_11 = malloc(sub_size*sizeof(data_t));
- data_t *A_12 = malloc(sub_size*sizeof(data_t));
- data_t *A_21 = malloc(sub_size*sizeof(data_t));
- data_t *A_22 = malloc(sub_size*sizeof(data_t));
- data_t *B_11 = malloc(sub_size*sizeof(data_t));
- data_t *B_12 = malloc(sub_size*sizeof(data_t));
- data_t *B_21 = malloc(sub_size*sizeof(data_t));
- data_t *B_22 = malloc(sub_size*sizeof(data_t));
-
- for(height=0; height < dime/2; height++) {
- for(width= 0; width < dime/2; width++) {
- A_11[width+(height*dime/2)] = sA[width + height*dime];
- B_11[width+(height*dime/2)] = sB[width + height*dime];
-
- A_12[width+(height*dime/2)] = sA[dime/2 + width + height*dime];
- B_12[width+(height*dime/2)] = sB[dime/2 + width + height*dime];
-
- A_21[width+(height*dime/2)] = sA[(dime*dime)/2 + width + height*dime];
- B_21[width+(height*dime/2)] = sB[(dime*dime)/2 + width + height*dime];
-
- A_22[width+(height*dime/2)] = sA[(dime*dime)/2 + dime/2 + width + height*dime];
- B_22[width+(height*dime/2)] = sB[(dime*dime)/2 + dime/2 + width + height*dime];
- }
- }
-
-// data_t H_1[sub_size], H_2[sub_size], H_3[sub_size], H_4[sub_size], H_5[sub_size],
-// H_6[sub_size], H_7[sub_size], H_8[sub_size], H_9[sub_size], H_10[sub_size],
-// H_11[sub_size], H_12[sub_size], H_13[sub_size], H_14[sub_size],
-// H_15[sub_size], H_16[sub_size], H_17[sub_size], H_18[sub_size];
-
- data_t *H_1 = malloc(sub_size*sizeof(data_t));
- data_t *H_2 = malloc(sub_size*sizeof(data_t));
- data_t *H_3 = malloc(sub_size*sizeof(data_t));
- data_t *H_4 = malloc(sub_size*sizeof(data_t));
- data_t *H_5 = malloc(sub_size*sizeof(data_t));
- data_t *H_6 = malloc(sub_size*sizeof(data_t));
- data_t *H_7 = malloc(sub_size*sizeof(data_t));
- data_t *H_8 = malloc(sub_size*sizeof(data_t));
- data_t *H_9 = malloc(sub_size*sizeof(data_t));
- data_t *H_10 = malloc(sub_size*sizeof(data_t));
-
- matrix_add(sub_size, A_11, A_22, H_1); //Helper1
- matrix_add(sub_size, B_11, B_22, H_2); //Helper2
- matrix_add(sub_size, A_21, A_22, H_3); //Helper3
- matrix_sub(sub_size, B_12, B_22, H_4); //Helper4
- matrix_sub(sub_size, B_21, B_11, H_5); //Helper5
- matrix_add(sub_size, A_11, A_12, H_6); //Helper6
- matrix_sub(sub_size, A_21, A_11, H_7); //Helper7
- matrix_add(sub_size, B_11, B_12, H_8); //Helper8
- matrix_sub(sub_size, A_12, A_22, H_9); //Helper9
- matrix_add(sub_size, B_21, B_22, H_10); //Helper10
-
- free(A_12);
- free(A_21);
- free(B_12);
- free(B_21);
-
- A_12 = NULL;
- A_21 = NULL;
- B_12 = NULL;
- B_21 = NULL;
-
-// data_t M_1[sub_size], M_2[sub_size], M_3[sub_size], M_4[sub_size],
-// M_5[sub_size], M_6[sub_size], M_7[sub_size];
-
- data_t *M_1 = malloc(sub_size*sizeof(data_t));
- data_t *M_2 = malloc(sub_size*sizeof(data_t));
- data_t *M_3 = malloc(sub_size*sizeof(data_t));
- data_t *M_4 = malloc(sub_size*sizeof(data_t));
- data_t *M_5 = malloc(sub_size*sizeof(data_t));
- data_t *M_6 = malloc(sub_size*sizeof(data_t));
- data_t *M_7 = malloc(sub_size*sizeof(data_t));
-
- if (sub_size == 1) {
- M_1[0] = H_1[0]*H_2[0];
- M_2[0] = H_3[0]*B_11[0];
- M_3[0] = A_11[0]*H_4[0];
- M_4[0] = A_22[0]*H_5[0];
- M_5[0] = H_6[0]*B_22[0];
- M_6[0] = H_7[0]*H_8[0];
- M_7[0] = H_9[0]*H_10[0];
- } else {
- strassen_mult(dime/2, H_1, H_2, M_1);
- strassen_mult(dime/2, H_3, B_11, M_2);
- strassen_mult(dime/2, A_11, H_4, M_3);
- strassen_mult(dime/2, A_22, H_5, M_4);
- strassen_mult(dime/2, H_6, B_22, M_5);
- strassen_mult(dime/2, H_7, H_8, M_6);
- strassen_mult(dime/2, H_9, H_10, M_7);
- }
-
- free(A_11);
- free(A_22);
- free(B_11);
- free(B_22);
-
- A_11 = NULL;
- A_22 = NULL;
- B_11 = NULL;
- B_22 = NULL;
-
- free(H_1);
- free(H_2);
- free(H_3);
- free(H_4);
- free(H_5);
- free(H_6);
- free(H_7);
- free(H_8);
- free(H_9);
- free(H_10);
-
- H_1 = NULL;
- H_2 = NULL;
- H_3 = NULL;
- H_4 = NULL;
- H_5 = NULL;
- H_6 = NULL;
- H_7 = NULL;
- H_8 = NULL;
- H_9 = NULL;
- H_10 = NULL;
-
- data_t *H_11 = malloc(sub_size*sizeof(data_t));
- data_t *H_12 = malloc(sub_size*sizeof(data_t));
- data_t *H_13 = malloc(sub_size*sizeof(data_t));
- data_t *H_14 = malloc(sub_size*sizeof(data_t));
-
- data_t *C_11 = malloc(sub_size*sizeof(data_t));
- data_t *C_12 = malloc(sub_size*sizeof(data_t));
- data_t *C_21 = malloc(sub_size*sizeof(data_t));
- data_t *C_22 = malloc(sub_size*sizeof(data_t));
-
- matrix_add(sub_size, M_1, M_4, H_11);
- matrix_add(sub_size, M_5, M_7, H_12);
- matrix_sub(sub_size, H_11, H_12, C_11);
-
- matrix_add(sub_size, M_3, M_5, C_12);
-
- matrix_add(sub_size, M_2, M_4, C_21);
-
- matrix_sub(sub_size, M_1, M_2, H_13);
- matrix_add(sub_size, M_3, M_6, H_14);
- matrix_add(sub_size, H_13, H_14, C_22);
-
- free(H_11);
- free(H_12);
- free(H_13);
- free(H_14);
-
- H_11 = NULL;
- H_12 = NULL;
- H_13 = NULL;
- H_14 = NULL;
-
-
- for(height=0; height < dime/2; height++) {
- for(width= 0; width < dime/2; width++) {
- sC[width + height*dime] = C_11[width+(height*dime/2)];
- sC[dime/2 + width + height*dime] = C_12[width+(height*dime/2)];
- sC[(dime*dime)/2 + width + height*dime] = C_21[width+(height*dime/2)];
- sC[(dime*dime)/2 + dime/2 + width + height*dime] = C_22[width+(height*dime/2)];
- }
- }
-
- free(C_11);
- free(C_12);
- free(C_21);
- free(C_22);
-
- C_11 = NULL;
- C_12 = NULL;
- C_21 = NULL;
- C_22 = NULL;
-
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- if (coreid > 0)
- return;
-
- strassen_mult(lda, A, B, C);
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- for (i = (n/ncores)*coreid; i < (n/ncores)*(coreid+1); i++)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k, ii, jj, bsize;
+ bsize = 16;
+ for ( jj = bsize*coreid; jj < lda; jj += bsize*ncores) {
+ for ( ii = 0; ii < lda; ii += bsize) {
+ for ( j = jj; j < lda && j < jj + bsize; j++) {
+ for ( i = ii; i < lda && i < ii + bsize; i += 8) {
+ data_t c1 = C[i + j*lda];
+ data_t c2 = C[i + j*lda + 1];
+ data_t c3 = C[i + j*lda + 2];
+ data_t c4 = C[i + j*lda + 3];
+ data_t c5 = C[i + j*lda + 4];
+ data_t c6 = C[i + j*lda + 5];
+ data_t c7 = C[i + j*lda + 6];
+ data_t c8 = C[i + j*lda + 7];
+ for ( k = 0; k < lda; k+=4 ) {
+ for (int x = 0; x < 4; x++) {
+ data_t a = A[j*lda + k+x];
+ data_t b1 = B[(k+x)*lda + i];
+ data_t b2 = B[(k+x)*lda + i + 1];
+ data_t b3 = B[(k+x)*lda + i + 2];
+ data_t b4 = B[(k+x)*lda + i + 3];
+ data_t b5 = B[(k+x)*lda + i + 4];
+ data_t b6 = B[(k+x)*lda + i + 5];
+ data_t b7 = B[(k+x)*lda + i + 6];
+ data_t b8 = B[(k+x)*lda + i + 7];
+ c1 += a * b1;
+ c2 += a * b2;
+ c3 += a * b3;
+ c4 += a * b4;
+ c5 += a * b5;
+ c6 += a * b6;
+ c7 += a * b7;
+ c8 += a * b8;
+ }
+ }
+ C[i + j*lda] = c1;
+ C[i + j*lda + 1] = c2;
+ C[i + j*lda + 2] = c3;
+ C[i + j*lda + 3] = c4;
+ C[i + j*lda + 4] = c5;
+ C[i + j*lda + 5] = c6;
+ C[i + j*lda + 6] = c7;
+ C[i + j*lda + 7] = c8;
+ }
+ }
+ }
+ }
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, bsize;
- bsize = 16;
- for ( jj = bsize*coreid; jj < lda; jj += bsize*ncores) {
- for ( ii = 0; ii < lda; ii += bsize) {
- for ( j = jj; j < lda && j < jj + bsize; j++) {
- for ( i = ii; i < lda && i < ii + bsize; i += 8) {
- data_t c1 = C[i + j*lda];
- data_t c2 = C[i + j*lda + 1];
- data_t c3 = C[i + j*lda + 2];
- data_t c4 = C[i + j*lda + 3];
- data_t c5 = C[i + j*lda + 4];
- data_t c6 = C[i + j*lda + 5];
- data_t c7 = C[i + j*lda + 6];
- data_t c8 = C[i + j*lda + 7];
- for ( k = 0; k < lda; k+=4 ) {
- for (int x = 0; x < 4; x++) {
- data_t a = A[j*lda + k+x];
- data_t b1 = B[(k+x)*lda + i];
- data_t b2 = B[(k+x)*lda + i + 1];
- data_t b3 = B[(k+x)*lda + i + 2];
- data_t b4 = B[(k+x)*lda + i + 3];
- data_t b5 = B[(k+x)*lda + i + 4];
- data_t b6 = B[(k+x)*lda + i + 5];
- data_t b7 = B[(k+x)*lda + i + 6];
- data_t b8 = B[(k+x)*lda + i + 7];
- c1 += a * b1;
- c2 += a * b2;
- c3 += a * b3;
- c4 += a * b4;
- c5 += a * b5;
- c6 += a * b6;
- c7 += a * b7;
- c8 += a * b8;
- }
- }
- C[i + j*lda] = c1;
- C[i + j*lda + 1] = c2;
- C[i + j*lda + 2] = c3;
- C[i + j*lda + 3] = c4;
- C[i + j*lda + 4] = c5;
- C[i + j*lda + 5] = c6;
- C[i + j*lda + 6] = c7;
- C[i + j*lda + 7] = c8;
- }
- }
- }
- }
-
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, bsize, start;
- bsize = 16;
- start = bsize*coreid;
- for ( jj = start; jj < lda; jj += bsize*ncores) {
- int first = 1;
- for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) {
- first = 0;
- for ( j = jj; j < lda && j < jj + bsize; j+=4) {
- for ( i = ii; i < lda && i < ii + bsize; i+=2) {
- data_t c1 = C[i + j*lda];
- data_t c2 = C[i + j*lda + 1];
- data_t c3 = C[i + (j+1)*lda];
- data_t c4 = C[i + (j+1)*lda + 1];
- data_t c5 = C[i + (j+2)*lda];
- data_t c6 = C[i + (j+2)*lda + 1];
- data_t c7 = C[i + (j+3)*lda];
- data_t c8 = C[i + (j+3)*lda + 1];
- for ( k = 0; k < lda; k+=8){
- for (int x = 0; x < 8; x++) {
- data_t a = A[j*lda + k+x];
- data_t a1 = A[(j+1)*lda +k+x];
- data_t a2 = A[(j+2)*lda +k+x];
- data_t a3 = A[(j+3)*lda +k+x];
- data_t b1 = B[(k+x)*lda + i];
- data_t b2 = B[(k+x)*lda + i + 1];
- c1 += a * b1;
- c2 += a * b2;
- c3 += a1* b1;
- c4 += a1* b2;
- c5 += a2* b1;
- c6 += a2* b2;
- c7 += a3* b1;
- c8 += a3* b2;
- }
- }
- C[i + j*lda] = c1;
- C[i + j*lda + 1] = c2;
- C[i + (j+1)*lda] = c3;
- C[i + (j+1)*lda + 1] = c4;
- C[i + (j+2)*lda] = c5;
- C[i + (j+2)*lda + 1] = c6;
- C[i + (j+3)*lda] = c7;
- C[i + (j+3)*lda + 1] = c8;
- }
- }
- }
- }
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, bsize, start;
- bsize = 16;
- start = bsize*coreid;
- for ( jj = start; jj < lda; jj += bsize*ncores) {
- int first = 1;
- for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) {
- first = 0;
- for ( j = jj; j < lda && j < jj + bsize; j+=4) {
- for ( i = ii; i < lda && i < ii + bsize; i+=2) {
- data_t c1 = C[i + j*lda];
- data_t c2 = C[i + j*lda + 1];
- data_t c3 = C[i + (j+1)*lda];
- data_t c4 = C[i + (j+1)*lda + 1];
- data_t c5 = C[i + (j+2)*lda];
- data_t c6 = C[i + (j+2)*lda + 1];
- data_t c7 = C[i + (j+3)*lda];
- data_t c8 = C[i + (j+3)*lda + 1];
- for ( k = 0; k < lda; k+=8){
- for (int x = 0; x < 8; x++) {
- data_t a = A[j*lda + k+x];
- data_t a1 = A[(j+1)*lda +k+x];
- data_t a2 = A[(j+2)*lda +k+x];
- data_t a3 = A[(j+3)*lda +k+x];
- data_t b1 = B[(k+x)*lda + i];
- data_t b2 = B[(k+x)*lda + i + 1];
- c1 += a * b1;
- c2 += a * b2;
- c3 += a1* b1;
- c4 += a1* b2;
- c5 += a2* b1;
- c6 += a2* b2;
- c7 += a3* b1;
- c8 += a3* b2;
- }
- }
- C[i + j*lda] = c1;
- C[i + j*lda + 1] = c2;
- C[i + (j+1)*lda] = c3;
- C[i + (j+1)*lda + 1] = c4;
- C[i + (j+2)*lda] = c5;
- C[i + (j+2)*lda + 1] = c6;
- C[i + (j+3)*lda] = c7;
- C[i + (j+3)*lda + 1] = c8;
- }
- }
- }
- }
-}
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
- size_t size;
- size_t start;
- size = n / ncores;
- start = coreid*size;
-
- for (i = start; (i < size + start) && i < n; i++) {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k, x;
+ data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
+
+ //complete Q1
+ if(coreid > 1) return;
+ if(coreid == 0) {
+ for(j = 0; j < 32; j++) {
+ temp0 = C[j*lda];
+ temp1 = C[1 + j*lda];
+ temp2 = C[2 + j*lda];
+ temp3 = C[3 + j*lda];
+ temp4 = C[4 + j*lda];
+ temp5 = C[5 + j*lda];
+ temp6 = C[6 + j*lda];
+ temp7 = C[7 + j*lda];
+ temp8 = C[8 + j*lda];
+ temp9 = C[9 + j*lda];
+ temp10 = C[10 + j*lda];
+ temp11 = C[11 + j*lda];
+ temp12 = C[12 + j*lda];
+ temp13 = C[13 + j*lda];
+ temp14 = C[14 + j*lda];
+ temp15 = C[15 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[k*lda];
+ temp1 += A[j*lda + k] * B[1+k*lda];
+ temp2 += A[j*lda + k] * B[2+k*lda];
+ temp3 += A[j*lda + k] * B[3+k*lda];
+ temp4 += A[j*lda + k] * B[4+k*lda];
+ temp5 += A[j*lda + k] * B[5+k*lda];
+ temp6 += A[j*lda + k] * B[6+k*lda];
+ temp7 += A[j*lda + k] * B[7+k*lda];
+ temp8 += A[j*lda + k] * B[8+k*lda];
+ temp9 += A[j*lda + k] * B[9+k*lda];
+ temp10 += A[j*lda + k] * B[10+k*lda];
+ temp11 += A[j*lda + k] * B[11+k*lda];
+ temp12 += A[j*lda + k] * B[12+k*lda];
+ temp13 += A[j*lda + k] * B[13+k*lda];
+ temp14 += A[j*lda + k] * B[14+k*lda];
+ temp15 += A[j*lda + k] * B[15+k*lda];
+ }
+ C[j*lda] = temp0;
+ C[1 + j*lda] = temp1;
+ C[2 + j*lda] = temp2;
+ C[3 + j*lda] = temp3;
+ C[4 + j*lda] = temp4;
+ C[5 + j*lda] = temp5;
+ C[6 + j*lda] = temp6;
+ C[7 + j*lda] = temp7;
+ C[8 + j*lda] = temp8;
+ C[9 + j*lda] = temp9;
+ C[10 + j*lda] = temp10;
+ C[11 + j*lda] = temp11;
+ C[12 + j*lda] = temp12;
+ C[13 + j*lda] = temp13;
+ C[14 + j*lda] = temp14;
+ C[15 + j*lda] = temp15;
+ }
+ }
+
+ if( coreid == 1 || ncores == 1) {
+ for(j = 0; j < 32; j++) {
+ temp0 = C[16 + j*lda];
+ temp1 = C[17 + j*lda];
+ temp2 = C[18 + j*lda];
+ temp3 = C[19 + j*lda];
+ temp4 = C[20 + j*lda];
+ temp5 = C[21 + j*lda];
+ temp6 = C[22 + j*lda];
+ temp7 = C[23 + j*lda];
+ temp8 = C[24 + j*lda];
+ temp9 = C[25 + j*lda];
+ temp10 = C[26 + j*lda];
+ temp11 = C[27 + j*lda];
+ temp12 = C[28 + j*lda];
+ temp13 = C[29 + j*lda];
+ temp14 = C[30 + j*lda];
+ temp15 = C[31 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[16 + k*lda];
+ temp1 += A[j*lda + k] * B[17 + k*lda];
+ temp2 += A[j*lda + k] * B[18 + k*lda];
+ temp3 += A[j*lda + k] * B[19 + k*lda];
+ temp4 += A[j*lda + k] * B[20 + k*lda];
+ temp5 += A[j*lda + k] * B[21 + k*lda];
+ temp6 += A[j*lda + k] * B[22 + k*lda];
+ temp7 += A[j*lda + k] * B[23 + k*lda];
+ temp8 += A[j*lda + k] * B[24 + k*lda];
+ temp9 += A[j*lda + k] * B[25 + k*lda];
+ temp10 += A[j*lda + k] * B[26 + k*lda];
+ temp11 += A[j*lda + k] * B[27 + k*lda];
+ temp12 += A[j*lda + k] * B[28 + k*lda];
+ temp13 += A[j*lda + k] * B[29 + k*lda];
+ temp14 += A[j*lda + k] * B[30 + k*lda];
+ temp15 += A[j*lda + k] * B[31 + k*lda];
+ }
+ C[16 + j*lda] = temp0;
+ C[17 + j*lda] = temp1;
+ C[18 + j*lda] = temp2;
+ C[19 + j*lda] = temp3;
+ C[20 + j*lda] = temp4;
+ C[21 + j*lda] = temp5;
+ C[22 + j*lda] = temp6;
+ C[23 + j*lda] = temp7;
+ C[24 + j*lda] = temp8;
+ C[25 + j*lda] = temp9;
+ C[26 + j*lda] = temp10;
+ C[27 + j*lda] = temp11;
+ C[28 + j*lda] = temp12;
+ C[29 + j*lda] = temp13;
+ C[30 + j*lda] = temp14;
+ C[31 + j*lda] = temp15;
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k, x;
- data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
-
- //complete Q1
- if(coreid == 0) {
- for(j = 0; j < 32; j++) {
- temp0 = C[j*lda];
- temp1 = C[1 + j*lda];
- temp2 = C[2 + j*lda];
- temp3 = C[3 + j*lda];
- temp4 = C[4 + j*lda];
- temp5 = C[5 + j*lda];
- temp6 = C[6 + j*lda];
- temp7 = C[7 + j*lda];
- temp8 = C[8 + j*lda];
- temp9 = C[9 + j*lda];
- temp10 = C[10 + j*lda];
- temp11 = C[11 + j*lda];
- temp12 = C[12 + j*lda];
- temp13 = C[13 + j*lda];
- temp14 = C[14 + j*lda];
- temp15 = C[15 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[k*lda];
- temp1 += A[j*lda + k] * B[1+k*lda];
- temp2 += A[j*lda + k] * B[2+k*lda];
- temp3 += A[j*lda + k] * B[3+k*lda];
- temp4 += A[j*lda + k] * B[4+k*lda];
- temp5 += A[j*lda + k] * B[5+k*lda];
- temp6 += A[j*lda + k] * B[6+k*lda];
- temp7 += A[j*lda + k] * B[7+k*lda];
- temp8 += A[j*lda + k] * B[8+k*lda];
- temp9 += A[j*lda + k] * B[9+k*lda];
- temp10 += A[j*lda + k] * B[10+k*lda];
- temp11 += A[j*lda + k] * B[11+k*lda];
- temp12 += A[j*lda + k] * B[12+k*lda];
- temp13 += A[j*lda + k] * B[13+k*lda];
- temp14 += A[j*lda + k] * B[14+k*lda];
- temp15 += A[j*lda + k] * B[15+k*lda];
- }
- C[j*lda] = temp0;
- C[1 + j*lda] = temp1;
- C[2 + j*lda] = temp2;
- C[3 + j*lda] = temp3;
- C[4 + j*lda] = temp4;
- C[5 + j*lda] = temp5;
- C[6 + j*lda] = temp6;
- C[7 + j*lda] = temp7;
- C[8 + j*lda] = temp8;
- C[9 + j*lda] = temp9;
- C[10 + j*lda] = temp10;
- C[11 + j*lda] = temp11;
- C[12 + j*lda] = temp12;
- C[13 + j*lda] = temp13;
- C[14 + j*lda] = temp14;
- C[15 + j*lda] = temp15;
- }
- }
-
- else {
- for(j = 0; j < 32; j++) {
- temp0 = C[16 + j*lda];
- temp1 = C[17 + j*lda];
- temp2 = C[18 + j*lda];
- temp3 = C[19 + j*lda];
- temp4 = C[20 + j*lda];
- temp5 = C[21 + j*lda];
- temp6 = C[22 + j*lda];
- temp7 = C[23 + j*lda];
- temp8 = C[24 + j*lda];
- temp9 = C[25 + j*lda];
- temp10 = C[26 + j*lda];
- temp11 = C[27 + j*lda];
- temp12 = C[28 + j*lda];
- temp13 = C[29 + j*lda];
- temp14 = C[30 + j*lda];
- temp15 = C[31 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[16 + k*lda];
- temp1 += A[j*lda + k] * B[17 + k*lda];
- temp2 += A[j*lda + k] * B[18 + k*lda];
- temp3 += A[j*lda + k] * B[19 + k*lda];
- temp4 += A[j*lda + k] * B[20 + k*lda];
- temp5 += A[j*lda + k] * B[21 + k*lda];
- temp6 += A[j*lda + k] * B[22 + k*lda];
- temp7 += A[j*lda + k] * B[23 + k*lda];
- temp8 += A[j*lda + k] * B[24 + k*lda];
- temp9 += A[j*lda + k] * B[25 + k*lda];
- temp10 += A[j*lda + k] * B[26 + k*lda];
- temp11 += A[j*lda + k] * B[27 + k*lda];
- temp12 += A[j*lda + k] * B[28 + k*lda];
- temp13 += A[j*lda + k] * B[29 + k*lda];
- temp14 += A[j*lda + k] * B[30 + k*lda];
- temp15 += A[j*lda + k] * B[31 + k*lda];
- }
- C[16 + j*lda] = temp0;
- C[17 + j*lda] = temp1;
- C[18 + j*lda] = temp2;
- C[19 + j*lda] = temp3;
- C[20 + j*lda] = temp4;
- C[21 + j*lda] = temp5;
- C[22 + j*lda] = temp6;
- C[23 + j*lda] = temp7;
- C[24 + j*lda] = temp8;
- C[25 + j*lda] = temp9;
- C[26 + j*lda] = temp10;
- C[27 + j*lda] = temp11;
- C[28 + j*lda] = temp12;
- C[29 + j*lda] = temp13;
- C[30 + j*lda] = temp14;
- C[31 + j*lda] = temp15;
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, x;
- data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
-
-
- if(coreid == 0) {
- for(j = 0; j < 32; j++) {
- temp0 = C[j*lda];
- temp1 = C[1 + j*lda];
- temp2 = C[2 + j*lda];
- temp3 = C[3 + j*lda];
- temp4 = C[4 + j*lda];
- temp5 = C[5 + j*lda];
- temp6 = C[6 + j*lda];
- temp7 = C[7 + j*lda];
- temp8 = C[8 + j*lda];
- temp9 = C[9 + j*lda];
- temp10 = C[10 + j*lda];
- temp11 = C[11 + j*lda];
- temp12 = C[12 + j*lda];
- temp13 = C[13 + j*lda];
- temp14 = C[14 + j*lda];
- temp15 = C[15 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[k*lda];
- temp1 += A[j*lda + k] * B[1 + k*lda];
- temp2 += A[j*lda + k] * B[2 + k*lda];
- temp3 += A[j*lda + k] * B[3 + k*lda];
- temp4 += A[j*lda + k] * B[4 + k*lda];
- temp5 += A[j*lda + k] * B[5 + k*lda];
- temp6 += A[j*lda + k] * B[6 + k*lda];
- temp7 += A[j*lda + k] * B[7 + k*lda];
- temp8 += A[j*lda + k] * B[8 + k*lda];
- temp9 += A[j*lda + k] * B[9 + k*lda];
- temp10 += A[j*lda + k] * B[10 + k*lda];
- temp11 += A[j*lda + k] * B[11 + k*lda];
- temp12 += A[j*lda + k] * B[12 + k*lda];
- temp13 += A[j*lda + k] * B[13 + k*lda];
- temp14 += A[j*lda + k] * B[14 + k*lda];
- temp15 += A[j*lda + k] * B[15 + k*lda];
- }
- C[j*lda] = temp0;
- C[1 + j*lda] = temp1;
- C[2 + j*lda] = temp2;
- C[3 + j*lda] = temp3;
- C[4 + j*lda] = temp4;
- C[5 + j*lda] = temp5;
- C[6 + j*lda] = temp6;
- C[7 + j*lda] = temp7;
- C[8 + j*lda] = temp8;
- C[9 + j*lda] = temp9;
- C[10 + j*lda] = temp10;
- C[11 + j*lda] = temp11;
- C[12 + j*lda] = temp12;
- C[13 + j*lda] = temp13;
- C[14 + j*lda] = temp14;
- C[15 + j*lda] = temp15;
- }
- }
-
- else {
- for(j = 16; j < 32; j++) {
- temp0 = C[16 + j*lda];
- temp1 = C[17 + j*lda];
- temp2 = C[18 + j*lda];
- temp3 = C[19 + j*lda];
- temp4 = C[20 + j*lda];
- temp5 = C[21 + j*lda];
- temp6 = C[22 + j*lda];
- temp7 = C[23 + j*lda];
- temp8 = C[24 + j*lda];
- temp9 = C[25 + j*lda];
- temp10 = C[26 + j*lda];
- temp11 = C[27 + j*lda];
- temp12 = C[28 + j*lda];
- temp13 = C[29 + j*lda];
- temp14 = C[30 + j*lda];
- temp15 = C[31 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[16 + k*lda];
- temp1 += A[j*lda + k] * B[17 + k*lda];
- temp2 += A[j*lda + k] * B[18 + k*lda];
- temp3 += A[j*lda + k] * B[19 + k*lda];
- temp4 += A[j*lda + k] * B[20 + k*lda];
- temp5 += A[j*lda + k] * B[21 + k*lda];
- temp6 += A[j*lda + k] * B[22 + k*lda];
- temp7 += A[j*lda + k] * B[23 + k*lda];
- temp8 += A[j*lda + k] * B[24 + k*lda];
- temp9 += A[j*lda + k] * B[25 + k*lda];
- temp10 += A[j*lda + k] * B[26 + k*lda];
- temp11 += A[j*lda + k] * B[27 + k*lda];
- temp12 += A[j*lda + k] * B[28 + k*lda];
- temp13 += A[j*lda + k] * B[29 + k*lda];
- temp14 += A[j*lda + k] * B[30 + k*lda];
- temp15 += A[j*lda + k] * B[31 + k*lda];
- }
- C[16 + j*lda] = temp0;
- C[17 + j*lda] = temp1;
- C[18 + j*lda] = temp2;
- C[19 + j*lda] = temp3;
- C[20 + j*lda] = temp4;
- C[21 + j*lda] = temp5;
- C[22 + j*lda] = temp6;
- C[23 + j*lda] = temp7;
- C[24 + j*lda] = temp8;
- C[25 + j*lda] = temp9;
- C[26 + j*lda] = temp10;
- C[27 + j*lda] = temp11;
- C[28 + j*lda] = temp12;
- C[29 + j*lda] = temp13;
- C[30 + j*lda] = temp14;
- C[31 + j*lda] = temp15;
- }
- for(j = 0; j <16; j++) {
- temp0 = C[16 + j*lda];
- temp1 = C[17 + j*lda];
- temp2 = C[18 + j*lda];
- temp3 = C[19 + j*lda];
- temp4 = C[20 + j*lda];
- temp5 = C[21 + j*lda];
- temp6 = C[22 + j*lda];
- temp7 = C[23 + j*lda];
- temp8 = C[24 + j*lda];
- temp9 = C[25 + j*lda];
- temp10 = C[26 + j*lda];
- temp11 = C[27 + j*lda];
- temp12 = C[28 + j*lda];
- temp13 = C[29 + j*lda];
- temp14 = C[30 + j*lda];
- temp15 = C[31 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[16 + k*lda];
- temp1 += A[j*lda + k] * B[17 + k*lda];
- temp2 += A[j*lda + k] * B[18 + k*lda];
- temp3 += A[j*lda + k] * B[19 + k*lda];
- temp4 += A[j*lda + k] * B[20 + k*lda];
- temp5 += A[j*lda + k] * B[21 + k*lda];
- temp6 += A[j*lda + k] * B[22 + k*lda];
- temp7 += A[j*lda + k] * B[23 + k*lda];
- temp8 += A[j*lda + k] * B[24 + k*lda];
- temp9 += A[j*lda + k] * B[25 + k*lda];
- temp10 += A[j*lda + k] * B[26 + k*lda];
- temp11 += A[j*lda + k] * B[27 + k*lda];
- temp12 += A[j*lda + k] * B[28 + k*lda];
- temp13 += A[j*lda + k] * B[29 + k*lda];
- temp14 += A[j*lda + k] * B[30 + k*lda];
- temp15 += A[j*lda + k] * B[31 + k*lda];
- }
- C[16 + j*lda] = temp0;
- C[17 + j*lda] = temp1;
- C[18 + j*lda] = temp2;
- C[19 + j*lda] = temp3;
- C[20 + j*lda] = temp4;
- C[21 + j*lda] = temp5;
- C[22 + j*lda] = temp6;
- C[23 + j*lda] = temp7;
- C[24 + j*lda] = temp8;
- C[25 + j*lda] = temp9;
- C[26 + j*lda] = temp10;
- C[27 + j*lda] = temp11;
- C[28 + j*lda] = temp12;
- C[29 + j*lda] = temp13;
- C[30 + j*lda] = temp14;
- C[31 + j*lda] = temp15;
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t start, end, i;
- start = (coreid == 0) ? 0 : n/2;
- end = (coreid == 0) ? n/2 : n;
-
- for (i = start; i < end; i++)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ size_t i, j, k, l;
+ int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
+ size_t max_dim = 32*32;
+ data_t element, element2, element3, element4, element5, element6, element7, element8;
+ data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
+ for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
+ row=l*32;
+ row2=(l+1)*32;
+ for (i=0; i<lda; i+=4){
+ element = A[row+i];
+ element2 = A[row+i+1];
+ element3 = A[row+i+2];
+ element4 = A[row+i+3];
+ element5 = A[row2+i];
+ element6 = A[row2+i+1];
+ element7 = A[row2+i+2];
+ element8 = A[row2+i+3];
+ column=i*32;
+ column2=(i+1)*32;
+ column3=(i+2)*32;
+ column4=(i+3)*32;
+ for (j=0; j<32; j+=4){
+ temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
+ temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
+ temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
+ temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
+ temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
+ temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
+ temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
+ temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
+ }
+ /*if (i==28){
+ for(k=0; k<32; k++){
+ C[row+k]=temp_mat[k];
+ C[row2+k]=temp_mat2[k];
+ temp_mat[k]=0;
+ temp_mat2[k]=0;
+ }
+ }*/
+ }
+ for(k=0; k<32; k++){
+ C[row+k]=temp_mat[k];
+ C[row2+k]=temp_mat2[k];
+ temp_mat[k]=0;
+ temp_mat2[k]=0;
+ }
+ }
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- size_t max_dim = 32*32;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
- row=l*32;
- row2=(l+1)*32;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
- column=i*32;
- column2=(i+1)*32;
- column3=(i+2)*32;
- column4=(i+3)*32;
- for (j=0; j<32; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
- /*if (i==28){
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- C[row2+k]=temp_mat2[k];
- temp_mat[k]=0;
- temp_mat2[k]=0;
- }
- }*/
- }
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- C[row2+k]=temp_mat2[k];
- temp_mat[k]=0;
- temp_mat2[k]=0;
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-/*size_t i;
- size_t j;
- size_t max_dim = lda*lda;
- if (coreid==0){
- for (i=0; i<max_dim/(ncores*2); i+=8){
- data_t elementA1 = A[i];
- data_t elementA12 = A[i+1];
- data_t elementA13 = A[i+2];
- data_t elementA14 = A[i+3];
- data_t elementA15 = A[i+4];
- data_t elementA16 = A[i+5];
- data_t elementA17 = A[i+6];
- data_t elementA18 = A[i+7];
- data_t elementA2 = A[i+32*8];
- data_t elementA21 = A[i+32*8+1];
- data_t elementA22 = A[i+32*8+2];
- data_t elementA23 = A[i+32*8+3];
- data_t elementA24 = A[i+32*8+4];
- data_t elementA25 = A[i+32*8+5];
- data_t elementA26 = A[i+32*8+6];
- data_t elementA27 = A[i+32*8+7];
- int row= (int)(i/32)*32;
- int row2 = row+8*32;
- int column1 = i%32*32;
- int column12 = (i+1)%32*32;
- int column13 = (i+2)%32*32;
- int column14 = (i+3)%32*32;
- int column15 = (i+4)%32*32;
- int column16 = (i+5)%32*32;
- int column17 = (i+6)%32*32;
- int column18 = (i+7)%32*32;
-
- for (j=0; j<lda; j++){
- C[row+j]+=elementA1*B[column1+j]+elementA12*B[column12+j]+elementA13*B[column13+j]+elementA14*B[column14+j]+elementA15*B[column15+j]+elementA16*B[column16+j]+elementA17*B[column17+j]+elementA18*B[column18+j]
-
- C[row2+j]+=elementA2*B[column1+j]+elementA21*B[column12+j]+elementA22*B[column13+j]+elementA23*B[column14+j]+elementA24*B[column15+j]+elementA25*B[column16+j]+elementA26*B[column17+j]+elementA27*B[column18+j];
- }
- }}else{
- for (i=max_dim/2; i<(max_dim/(ncores*2)+max_dim/2); i+=8){
- data_t elementA1 = A[i];
- data_t elementA12 = A[i+1];
- data_t elementA13 = A[i+2];
- data_t elementA14 = A[i+3];
- data_t elementA15 = A[i+4];
- data_t elementA16 = A[i+5];
- data_t elementA17 = A[i+6];
- data_t elementA18 = A[i+7];
- data_t elementA2 = A[i+32*8];
- data_t elementA21 = A[i+32*8+1];
- data_t elementA22 = A[i+32*8+2];
- data_t elementA23 = A[i+32*8+3];
- data_t elementA24 = A[i+32*8+4];
- data_t elementA25 = A[i+32*8+5];
- data_t elementA26 = A[i+32*8+6];
- data_t elementA27 = A[i+32*8+7];
- int row= (int)(i/32)*32;
- int row2 = row+8*32;
- int column1 = i%32*32;
- int column12 = (i+1)%32*32;
- int column13 = (i+2)%32*32;
- int column14 = (i+3)%32*32;
- int column15 = (i+4)%32*32;
- int column16 = (i+5)%32*32;
- int column17 = (i+6)%32*32;
- int column18 = (i+7)%32*32;
-
- for (j=0; j<lda; j++){
- C[row+j]+=elementA1*B[column1+j]+elementA12*B[column12+j]+elementA13*B[column13+j]+elementA14*B[column14+j]+elementA15*B[column15+j]+elementA16*B[column16+j]+elementA17*B[column17+j]+elementA18*B[column18+j];
- C[row2+j]+=elementA2*B[column1+j]+elementA21*B[column12+j]+elementA22*B[column13+j]+elementA23*B[column14+j]+elementA24*B[column15+j]+elementA25*B[column16+j]+elementA26*B[column17+j]+elementA27*B[column18+j];
-
- }
- }
- }*/
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i;
- size_t j;
- size_t k;
- size_t max_dim = 32*32;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores)/2; i+=8){
- data_t element=A[i];
- data_t element2 = A[i+1];
- data_t element3 = A[i+2];
- data_t element4 = A[i+3];
- data_t element5 = A[i+4];
- data_t element6 = A[i+5];
- data_t element7 = A[i+6];
- data_t element8 = A[i+7];
- data_t elementA2 = A[i+32*8];
- data_t elementA21 = A[i+32*8+1];
- data_t elementA22 = A[i+32*8+2];
- data_t elementA23 = A[i+32*8+3];
- data_t elementA24 = A[i+32*8+4];
- data_t elementA25 = A[i+32*8+5];
- data_t elementA26 = A[i+32*8+6];
- data_t elementA27 = A[i+32*8+7];
- int row= (int)(i/32)*32;
- int row2 = row+8*32;
- int column = i%32*32;
- int column2 = (i+1)%32*32;
- int column3 = (i+2)%32*32;
- int column4 = (i+3)%32*32;
- int column5 = (i+4)%32*32;
- int column6 = (i+5)%32*32;
- int column7 = (i+6)%32*32;
- int column8 = (i+7)%32*32;
-
- for (j=0; j<32; j++){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j];
-
- temp_mat2[j]+=elementA2*B[column+j]+elementA21*B[column2+j]+elementA22*B[column3+j]+elementA23*B[column4+j]+elementA24*B[column5+j]+elementA25*B[column6+j]+elementA26*B[column7+j]+elementA27*B[column8+j];
- }
- if (i%32==24){
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- C[row2+k]=temp_mat2[k];
- temp_mat[k]=0;
- temp_mat2[k]=0;
-
- }
- }
- }
-
-
-
-
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- /*size_t i;
- size_t j;
- size_t k;
- size_t max_dim = 32*32;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- data_t element=A[i];
- data_t element2 = A[i+1];
- data_t element3 = A[i+2];
- data_t element4 = A[i+3];
- data_t element5 = A[i+4];
- data_t element6 = A[i+5];
- data_t element7 = A[i+6];
- data_t element8 = A[i+7];
- int row= (int)(i/32)*32;
- int column = i%32*32;
- int column2 = (i+1)%32*32;
- int column3 = (i+2)%32*32;
- int column4 = (i+3)%32*32;
- int column5 = (i+4)%32*32;
- int column6 = (i+5)%32*32;
- int column7 = (i+6)%32*32;
- int column8 = (i+7)%32*32;
-
- for (j=0; j<32; j++){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j];
- }
- if (i%32==24){
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- }
- }
- }*/
- int i,j,k,l;
- //data_t element11, element12, element13, element14, element21, element22, element23, element24;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- //int column11, column12, column13, column14, column21, column22, column23, column24;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<32; i++){
- if (i==15){
- for (j=0; j<32; j+=4){
- row=15*32;
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0;k<32; k++){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]=temp[l];
- temp[l]=0;
- }
- }
- }
- }
- else{
- row = i*32;
- for (j=0; j<16; j+=4){
- element1 = A[i*32+j];
- element2 = A[i*32+j+1];
- element3 = A[i*32+j+2];
- element4 = A[i*32+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k++){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- }
- if (j==12){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- temp[l]=0;
- }
- }
- }
- }
- }
- }
- else if (coreid==1){
- for (i=0; i<32; i++){
- row = (31-i)*32;
- if (row/32 != 15){
- for (j=16; j<32; j+=4){
- element1 = A[(31-i)*32+j];
- element2 = A[(31-i)*32+j+1];
- element3 = A[(31-i)*32+j+2];
- element4 = A[(31-i)*32+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k++){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- temp[l]=0;
- }
- }
- }
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i,j,k,l;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<lda; i+=2){
- row = i*lda;
- row2 = (i+1)*lda;
- for (j=0; j<16; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
-
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==12){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- else if (coreid==1){
- for (i=0; i<32; i+=2){
- row = (31-i)*lda;
- row2 = (31-i-1)*lda;
- for (j=16; j<32; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- /*size_t i, j, k;
- int row, column, column2, column3, column4, column5, column6, column7, column8;
- size_t max_dim = 32*32;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- element=A[i];
- element2 = A[i+1];
- element3 = A[i+2];
- element4 = A[i+3];
- element5 = A[i+4];
- element6 = A[i+5];
- element7 = A[i+6];
- element8 = A[i+7];
- row= (int)(i/32)*32;
- column = i%32*32;
- column2 = (i+1)%32*32;
- column3 = (i+2)%32*32;
- column4 = (i+3)%32*32;
- column5 = (i+4)%32*32;
- column6 = (i+5)%32*32;
- column7 = (i+6)%32*32;
- column8 = (i+7)%32*32;
-
- for (j=0; j<32; j+=8){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j]+element5*B[column5+j]+element6*B[column6+j]+element7*B[column7+j]+element8*B[column8+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1]+element5*B[column5+j+1]+element6*B[column6+j+1]+element7*B[column7+j+1]+element8*B[column8+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2]+element5*B[column5+j+2]+element6*B[column6+j+2]+element7*B[column7+j+2]+element8*B[column8+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3]+element5*B[column5+j+3]+element6*B[column6+j+3]+element7*B[column7+j+3]+element8*B[column8+j+3];
- temp_mat[j+4]+=element*B[column+j+4]+element2*B[column2+j+4]+element3*B[column3+j+4]+element4*B[column4+j+4]+element5*B[column5+j+4]+element6*B[column6+j+4]+element7*B[column7+j+4]+element8*B[column8+j+4];
- temp_mat[j+5]+=element*B[column+j+5]+element2*B[column2+j+5]+element3*B[column3+j+5]+element4*B[column4+j+5]+element5*B[column5+j+5]+element6*B[column6+j+5]+element7*B[column7+j+5]+element8*B[column8+j+5];
- temp_mat[j+6]+=element*B[column+j+6]+element2*B[column2+j+6]+element3*B[column3+j+6]+element4*B[column4+j+6]+element5*B[column5+j+6]+element6*B[column6+j+6]+element7*B[column7+j+6]+element8*B[column8+j+6];
- temp_mat[j+7]+=element*B[column+j+7]+element2*B[column2+j+7]+element3*B[column3+j+7]+element4*B[column4+j+7]+element5*B[column5+j+7]+element6*B[column6+j+7]+element7*B[column7+j+7]+element8*B[column8+j+7];
- }
- if (i%32==24){
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- temp_mat[k]=0;
- }
- }
- }*/
- int i,j,k,l;
- data_t element1, element2, element3, element4, element5, element6, element7, element8;
- int row, row2;
- int column1, column2, column3, column4, column5, column6, column7, column8;
- data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- if (coreid == 0){
- for (i=0; i<32; i+=2){
- row = i*32;
- row2 = (i+1)*32;
- for (j=0; j<16; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
-
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==12){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- else if (coreid==1){
- for (i=0; i<32; i+=2){
- row = (31-i)*32;
- row2 = (31-i-1)*32;
- for (j=16; j<32; j+=4){
- element1 = A[row+j];
- element2 = A[row+j+1];
- element3 = A[row+j+2];
- element4 = A[row+j+3];
- element5 = A[row2+j];
- element6 = A[row2+j+1];
- element7 = A[row2+j+2];
- element8 = A[row2+j+3];
- column1 = j*32;
- column2 = (j+1)*32;
- column3 = (j+2)*32;
- column4 = (j+3)*32;
- for (k=0; k<32; k+=4){
- temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
- temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
- temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
- temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
- temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
- temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
- temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
- temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
- }
- if (j==28){
- for (l=0; l<32; l++){
- C[row+l]+=temp[l];
- C[row2+l]+=temp2[l];
- temp[l]=0;
- temp2[l]=0;
- }
- }
- }
- }
- }
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- size_t max_dim = 32*32;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
- row=l*32;
- row2=(l+1)*32;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
- column=i*32;
- column2=(i+1)*32;
- column3=(i+2)*32;
- column4=(i+3)*32;
- for (j=0; j<32; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
- /*if (i==28){
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- C[row2+k]=temp_mat2[k];
- temp_mat[k]=0;
- temp_mat2[k]=0;
- }
- }*/
- }
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- C[row2+k]=temp_mat2[k];
- temp_mat[k]=0;
- temp_mat2[k]=0;
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t i, j, k, l;
- int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
- size_t max_dim = 32*32;
- data_t element, element2, element3, element4, element5, element6, element7, element8;
- data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
- for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
- row=l*32;
- row2=(l+1)*32;
- for (i=0; i<lda; i+=4){
- element = A[row+i];
- element2 = A[row+i+1];
- element3 = A[row+i+2];
- element4 = A[row+i+3];
- element5 = A[row2+i];
- element6 = A[row2+i+1];
- element7 = A[row2+i+2];
- element8 = A[row2+i+3];
- column=i*32;
- column2=(i+1)*32;
- column3=(i+2)*32;
- column4=(i+3)*32;
- for (j=0; j<32; j+=4){
- temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
- temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
- temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
- temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
- temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
- temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
- temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
- temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
- }
- if (i==28){
- for(k=0; k<32; k++){
- C[row+k]=temp_mat[k];
- C[row2+k]=temp_mat2[k];
- temp_mat[k]=0;
- temp_mat2[k]=0;
- }
- }
- }
- }
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
-
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- for (i= coreid*n/ncores; i<(n/ncores+coreid*n/ncores); i++){
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ int i, j, k, limit, end, kblock, iblock, r, jblock;
+ int tempA1;
+ int tempB1;
+
+ limit = lda / ncores;
+ j = (coreid)*limit;
+ end = (coreid+1)*limit;
+
+ kblock = 1;
+ iblock = 1;
+ jblock = 1;
+ for (; j < end; j+= jblock)
+ for ( k = 0; k < lda; k = k + kblock )
+ {
+ r = j*lda + k;
+ tempA1 = A[r];
+
+ for ( i = 0; i < lda; i = i + iblock ) {
+ tempB1 = k*lda + i;
+
+ C[i + j*lda] += tempA1*B[tempB1];
+
+ }
+ barrier(ncores);
+ }
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( j = 0; j < lda; j++ )
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- int i, j, k, limit, end, kblock, iblock, r, jblock;
- int tempA1;
- int tempB1;
-
- limit = lda / 2;
- if (coreid == 0){
- j = 0;
- end = limit;
- } else {
- j = limit;
- end = lda;
- }
-
- kblock = 1;
- iblock = 1;
- jblock = 1;
- for (; j < end; j+= jblock)
- for ( k = 0; k < lda; k = k + kblock )
- {
- r = j*lda + k;
- tempA1 = A[r];
-
- for ( i = 0; i < lda; i = i + iblock ) {
- tempB1 = k*lda + i;
-
- C[i + j*lda] += tempA1*B[tempB1];
-
- }
- }
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( j = 0; j < lda; j++ )
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- int i, j, k, limit, end, kblock, iblock, r, jblock;
- int tempA1;
- int tempB1;
-
- limit = lda / 2;
- if (coreid == 0){
- j = 0;
- end = limit;
- } else {
- j = limit;
- end = lda;
- }
-
- kblock = 1;
- iblock = 1;
- jblock = 1;
- for (; j < end; j+= jblock)
- for ( k = 0; k < lda; k = k + kblock )
- {
- r = j*lda + k;
- tempA1 = A[r];
-
- for ( i = 0; i < lda; i = i + iblock ) {
- tempB1 = k*lda + i;
-
- C[i + j*lda] += tempA1*B[tempB1];
-
- }
- }
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k, ii, jj, kk;
+ int block = lda / ncores;
+ int leftover = lda % ncores;
+ int start = block * coreid;
+
+
+
+ for ( j = start; j < (start+block); j++ )
+ for ( k = 0; k < lda; k++ )
+ {
+ for ( i = 0; i < lda; i++ )
+ {
+ C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: ME STEPHANIE TUNG
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- int block = lda / ncores;
- int leftover = lda % ncores;
- int start = block * coreid;
-
-
-
- for ( j = start; j < (start+block); j++ )
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-/*
-
-
- for ( j = coreid; j < lda; j += ncores )
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-/*
- if (coreid > 0) {
- return;
- }
-
- for ( j = (lda - leftover); j < lda; j++ )
- for ( i = 0; i < lda; i++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-
-
-if (coreid > 0) {
- return;
-}
-
-
-
-for (jj = start; jj < start+block; jj += 4) {
- for (kk = 0; kk < lda; kk += 4) {
- for (ii = 0; ii < lda; ii += 4) {
- for (i = ii; i < ii+4; i += 4) {
- //float * p = B + i;
- for (j = jj; j < jj+4; j++) {
- for (k = kk; k < kk+4; k++) {
-
- float a = A[k + j*lda];
-
- C[i + j*lda] += a * B[k*lda + i];
- C[i + j*lda + 1] += a * B[k*lda + i + 1];
- C[i + j*lda + 2] += a * B[k*lda + i + 2];
- C[i + j*lda + 3] += a * B[k*lda + i + 3];
- }
- }
- }
- }
- }
-}
-
-*/
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: ME STEPHANIE TUNG
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- int block = lda / ncores;
- int leftover = lda % ncores;
- int start = block * coreid;
-
-
-
- for ( j = start; j < (start+block); j++ )
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-/*
-
-
- for ( j = coreid; j < lda; j += ncores )
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-/*
- if (coreid > 0) {
- return;
- }
-
- for ( j = (lda - leftover); j < lda; j++ )
- for ( i = 0; i < lda; i++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-
-
-if (coreid > 0) {
- return;
-}
-
-
-
-for (jj = start; jj < start+block; jj += 4) {
- for (kk = 0; kk < lda; kk += 4) {
- for (ii = 0; ii < lda; ii += 4) {
- for (i = ii; i < ii+4; i += 4) {
- //float * p = B + i;
- for (j = jj; j < jj+4; j++) {
- for (k = kk; k < kk+4; k++) {
-
- float a = A[k + j*lda];
-
- C[i + j*lda] += a * B[k*lda + i];
- C[i + j*lda + 1] += a * B[k*lda + i + 1];
- C[i + j*lda + 2] += a * B[k*lda + i + 2];
- C[i + j*lda + 3] += a * B[k*lda + i + 3];
- }
- }
- }
- }
- }
-}
-
-*/
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i, j;
-
- size_t blocksize = n / ncores;
- size_t start = coreid * blocksize;
- size_t leftover = n % ncores;
-
-// int i, j;
-
- for (i = start; i < (start + blocksize); i++) {
- x[i] = x[i] + y[i];
- }
-
- for (j = (n - leftover) + coreid; j < n; j += ncores) {
- x[j] = x[j] + y[j];
- }
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+ for (int i = coreid; i < lda; i+=ncores*2)
+ {
+ for (int j = 0; j < lda; j++)
+ {
+ for (int k = 0; k < lda; k++)
+ {
+ int A12 = A[j*lda + k];
+ int B1 = B[k*lda + i];
+ int B2 = B[k*lda + i + ncores];
+ C[i+j*lda] += A12 * B1;
+ C[i+ncores+j*lda] += A12 * B2;
+ //C[i+j*lda] += A[j*lda +k] * B[k*lda +i];
+ }
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- for (int i = coreid; i < lda; i+=ncores*2)
- {
- for (int j = 0; j < lda; j++)
- {
- for (int k = 0; k < lda; k++)
- {
- int A12 = A[j*lda + k];
- int B1 = B[k*lda + i];
- int B2 = B[k*lda + i + ncores];
- C[i+j*lda] += A12 * B1;
- C[i+ncores+j*lda] += A12 * B2;
- //C[i+j*lda] += A[j*lda +k] * B[k*lda +i];
- }
- }
- }
-}
-
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- for (int i = coreid; i < lda; i+=ncores)
- {
- for (int j = 0; j < lda; j++)
- {
- for (int k = 0; k < lda; k++)
- {
- //int A12 = A[j*lda + k];
- //int B1 = B[k*lda + i];
- //int B2 = B[k*lda + i + ncores];
- //C[i+j*lda] += A12 * B1;
- //C[i+ncores+j*lda] += A12 * B2;
- C[i+j*lda] += A[j*lda +k] * B[k*lda +i];
- }
- }
- }
-}
-
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
-
- for (i = coreid; i < n; i+=ncores*2)
- {
- //int x1 = x[i];
- //int x2 = x[i+ncores];
- //int x3 = x[i+ncores*2];
- //int x4 = x[i+ncores*4];
- //int y1 = y[i];
- //int y2 = y[i+ncores];
- //int y3 = y[i+ncores*2];
- //int y4 = y[i+ncores*4];
- int x1 = x[i];
- int x2 = x[i+ncores];
- int y1 = y[i];
- int y2 = y[i+ncores];
- x[i] = x1 + y1;
- x[i+ncores] = x2 + y2;
- //x[i+ncores*2] = x[i+ncores*2] + y[i+ncores*2];
- // x[i+ncores*4] = x[i+ncores*4] + y[i+ncores*4];
- //x[i] = x1 + y1;
- //x[i+ncores] = x2 + y2;
- //x[i+ncores*2] = x3 + y3;
- //x[i+ncores*4] = x4 + y4;
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+ int i, j, k, B_t[32*32], x, y;
+ int ALoc, BLoc, CLoc;
+// int ii = 0, done = 0;
+ //for(x = coreid*(lda/ncores); x < (coreid+1)*(lda/ncores) && x < lda; x++) {
+ for (x = 0; x < lda; x++) {
+ for(y = 0; y < lda; y++) {
+ B_t[y*lda + x] = B[x*lda + y];
+ }
+ }
+ // for ( ii = lda/4 ; ii < lda ; ii += lda/4)
+ //{
+// for ( i = coreid*(ii/ncores); i < (coreid+1)*(ii/ncores) && i < ii; i++ )
+ for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores) && i < lda; i++ )
+ {
+ ALoc = i*lda;
+ for ( j = 0; j < lda; j++ )
+ {
+ BLoc = j*lda;
+ CLoc = i*lda + j;
+ for ( k = 0; k < lda; k++ )
+ {
+ C[CLoc] += A[ALoc + k] * B_t[BLoc + k];
+ }
+ }
+ }
+ //}
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k, B_t[32*32], x, y;
- int ALoc, BLoc, CLoc;
-// int ii = 0, done = 0;
- //for(x = coreid*(lda/ncores); x < (coreid+1)*(lda/ncores) && x < lda; x++) {
- for (x = 0; x < lda; x++) {
- for(y = 0; y < lda; y++) {
- B_t[y*lda + x] = B[x*lda + y];
- }
- }
- // for ( ii = lda/4 ; ii < lda ; ii += lda/4)
- //{
-// for ( i = coreid*(ii/ncores); i < (coreid+1)*(ii/ncores) && i < ii; i++ )
- for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores) && i < lda; i++ )
- {
- ALoc = i*lda;
- for ( j = 0; j < lda; j++ )
- {
- BLoc = j*lda;
- CLoc = i*lda + j;
- for ( k = 0; k < lda; k++ )
- {
- C[CLoc] += A[ALoc + k] * B_t[BLoc + k];
- }
- }
- }
- //}
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- for(int i = coreid*(n/ncores); i < (coreid+1)*(n/ncores) && i < n; i++)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k, n, m;
-
-
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 957424 cycles, 29.2 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(nc): 340408 cycles, 10.3 cycles/iter, 1.8 CPI
-
- for (n = 0; n < lda; n += 1) {
- for (m = 0; m < lda; m += 1) {
- bTranspose[lda*m + n] = B[lda*n + m];
- bTranspose[lda*n + m] = B[lda*m + n];
- }
- }
- barrier(ncores);
-
- for ( j = coreid; j < lda; j += 2*ncores ) {
- for ( i = 0; i < lda; i += 1 ){
- c1 = 0; //global vars c1, c2
- c2 = 0;
- for ( k = 0; k < lda; k += 1 ) {
- c1 += A[j * lda + k] * bTranspose[i*lda + k];
- c2 += A[(j+2) * lda + k] * bTranspose[i*lda + k];
-
- //barrier(nc);
- }
-
- C[i + j * lda] = c1;
- C[i + (j+2) * lda] = c2;
- barrier(ncores);
- }
- //barrier(nc);
- }
-
-
-
-
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 983609 cycles, 30.0 cycles/iter, 3.7 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(nc): 389942 cycles, 11.9 cycles/iter, 2.5 CPI
-
- /*
- for ( j = coreid; j < lda; j += 2*ncores ) {
- for ( i = 0; i < lda; i += 1 ){
- c1 = 0; //global vars c1, c2
- c2 = 0;
- for ( k = 0; k < lda; k += 1 ) {
- c1 += A[j * lda + k] * B[k*lda + i];
- c2 += A[(j+2) * lda + k] * B[k*lda + i];
-
- //barrier(nc);
- }
-
- C[i + j * lda] = c1;
- C[i + (j+2) * lda] = c2;
- barrier(nc);
- }
- //barrier(nc);
- }
- */
-
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 973781 cycles, 29.7 cycles/iter, 3.7 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(nc): 461066 cycles, 14.0 cycles/iter, 3.5 CPI
- // for ( k = 0; k < lda; k += 1 ) {
- // for ( j = coreid; j < lda; j += 2*ncores ) {
- // for ( i = 0; i < lda; i += 1 ){
- // C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
- // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier(nc);
- // }
- // barrier(nc);
- // }
- // //barrier(nc);
- // }
-
-
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 965136 cycles, 29.4 cycles/iter, 3.7 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(nc): 513779 cycles, 15.6 cycles/iter, 3.2 CPI
-
- // for ( j = coreid; j < lda; j += 2*ncores ) {
- // for ( i = 0; i < lda; i += 1 ){
- // for ( k = 0; k < lda; k += 1 ) {
- // C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
- // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
-
- // //barrier(nc);
- // }
- // barrier(nc);
- // }
- // //barrier(nc);
- //}
-
-
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 937892 cycles, 28.6 cycles/iter, 3.6 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(nc): 576478 cycles, 17.5 cycles/iter, 3.5 CPI
-
- // for ( i = 0; i < lda; i += 1 ){
- // for ( j = coreid; j < lda; j += 2*ncores ) {
- // for ( k = 0; k < lda; k += 1 ) {
- // C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
- // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
-
- // //barrier(nc);
- // }
- // barrier(nc);
- // }
- // //barrier(nc);
- // }
-
- //for ( i = coreid; i < lda; i += ncores ){
- // for ( j = coreid; j < lda; j += ncores ) {
- // for ( k = coreid; k < lda; k += ncores ) {
- // C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- // }
- //barrier(nc);
- // }
- //}
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static int c1;
-static int c2;
-//static int n;
-//static int m;
-static data_t bTranspose[DIM_SIZE*DIM_SIZE];
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k, n, m, c1, c2;
-
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
-
- for ( j = coreid; j < lda; j += 2*ncores ) {
- for ( i = 0; i < lda; i += 1 ){
- c1 = 0; //global vars c1, c2
- c2 = 0;
- for ( k = 0; k < lda; k += 1 ) {
- c1 += A[j * lda + k] * B[k*lda + i];
- c2 += A[(j+2) * lda + k] * B[k*lda + i];
-
- //barrier(nc);
- }
-
- C[i + j * lda] = c1;
- C[i + (j+2) * lda] = c2;
- barrier(nc);
- }
- //barrier(nc);
- }
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
-
- for (i = coreid; i < n; i += 2*ncores) {
- x[i] = x[i] + y[i];
- x[i+2] = x[i+2] + y[i+2];
- //barrier(nc);
- }
- barrier(ncores); //adding a barrier so there aren't any OOB errors due to faster threads
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+ int i, j, k;
+
+ /*547287
+ for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ )
+ {
+ for ( j = 0; j < lda; j++ )
+ {
+ int aIndex = j*lda;
+ int cIndex = i + aIndex;
+ C[cIndex] += A[aIndex] * B[i];
+ C[cIndex] += A[aIndex + 1] * B[1*lda + i];
+ C[cIndex] += A[aIndex + 2] * B[2*lda + i];
+ C[cIndex] += A[aIndex + 3] * B[3*lda + i];
+ C[cIndex] += A[aIndex + 4] * B[4*lda + i];
+ C[cIndex] += A[aIndex + 5] * B[5*lda + i];
+ C[cIndex] += A[aIndex + 6] * B[6*lda + i];
+ C[cIndex] += A[aIndex + 7] * B[7*lda + i];
+ C[cIndex] += A[aIndex + 8] * B[8*lda + i];
+ C[cIndex] += A[aIndex + 9] * B[9*lda + i];
+ C[cIndex] += A[aIndex + 10] * B[10*lda + i];
+ C[cIndex] += A[aIndex + 11] * B[11*lda + i];
+ C[cIndex] += A[aIndex + 12] * B[12*lda + i];
+ C[cIndex] += A[aIndex + 13] * B[13*lda + i];
+ C[cIndex] += A[aIndex + 14] * B[14*lda + i];
+ C[cIndex] += A[aIndex + 15] * B[15*lda + i];
+ C[cIndex] += A[aIndex + 16] * B[16*lda + i];
+ C[cIndex] += A[aIndex + 17] * B[17*lda + i];
+ C[cIndex] += A[aIndex + 18] * B[18*lda + i];
+ C[cIndex] += A[aIndex + 19] * B[19*lda + i];
+ C[cIndex] += A[aIndex + 20] * B[20*lda + i];
+ C[cIndex] += A[aIndex + 21] * B[21*lda + i];
+ C[cIndex] += A[aIndex + 22] * B[22*lda + i];
+ C[cIndex] += A[aIndex + 23] * B[23*lda + i];
+ C[cIndex] += A[aIndex + 24] * B[24*lda + i];
+ C[cIndex] += A[aIndex + 25] * B[25*lda + i];
+ C[cIndex] += A[aIndex + 26] * B[26*lda + i];
+ C[cIndex] += A[aIndex + 27] * B[27*lda + i];
+ C[cIndex] += A[aIndex + 28] * B[28*lda + i];
+ C[cIndex] += A[aIndex + 29] * B[29*lda + i];
+ C[cIndex] += A[aIndex + 30] * B[30*lda + i];
+ C[cIndex] += A[aIndex + 31] * B[31*lda + i];
+ }
+ }
+ */
+
+ //492827
+ /* for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ )
+ {
+ for ( j = 0; j < lda; j++ )
+ {
+
+ int aIndex = j*lda;
+ int cIndex = i + aIndex;
+ for ( k = 0; k < lda; k++)
+ {
+ C[cIndex] += A[aIndex + k] * B[k*lda + i];
+ /* C[cIndex] += A[aIndex + k+1] * B[(k+1)*lda + i];
+ C[cIndex] += A[aIndex + k+2] * B[(k+2)*lda + i];
+ C[cIndex] += A[aIndex + k+3] * B[(k+3)*lda + i];
+ C[cIndex] += A[aIndex + k+4] * B[(k+4)*lda + i];
+ C[cIndex] += A[aIndex + k+5] * B[(k+5)*lda + i];
+ C[cIndex] += A[aIndex + k+6] * B[(k+6)*lda + i];
+ C[cIndex] += A[aIndex + k+7] * B[(k+7)*lda + i];
+ C[cIndex] += A[aIndex + k+8] * B[(k+8)*lda + i];
+ C[cIndex] += A[aIndex + k+9] * B[(k+9)*lda + i];
+ C[cIndex] += A[aIndex + k+10] * B[(k+10)*lda + i];
+ C[cIndex] += A[aIndex + k+11] * B[(k+11)*lda + i];
+ C[cIndex] += A[aIndex + k+12] * B[(k+12)*lda + i];
+ C[cIndex] += A[aIndex + k+13] * B[(k+13)*lda + i];
+ C[cIndex] += A[aIndex + k+14] * B[(k+14)*lda + i];
+ C[cIndex] += A[aIndex + k+15] * B[(k+15)*lda + i];*/
+ /* }
+ }
+ }*/
+ /*
+ //326378
+ data_t bTrans[1024];
+
+ for (int counti = 0; counti < 32; counti++) {
+ for (int countj = 0; countj < 32; countj++) {
+ *(bTrans + counti + countj*lda) = *(B + countj + counti*lda);
+ }
+ }
+
+
+ int BLOCKSIZE = 8;
+ for ( i = 0; i < lda; i+=BLOCKSIZE )
+ {
+ for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) {
+ int iFlag = iTemp*lda;
+ for ( j = coreid*lda/ncores; j < (coreid+1)*lda/ncores; j++ ) {
+ int jFlag = j*lda;
+ int cLoc = jFlag+iTemp;
+ for ( k = 0; k < lda; k+=8) {
+ *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k);
+ *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1);
+ *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2);
+ *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3);
+ *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4);
+ *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5);
+ *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6);
+ *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7);
+ }
+ }
+ }
+ }*/
+ data_t bTrans[1024];
+
+ for (int counti = 0; counti < 32; counti++) {
+ for (int countj = 0; countj < 32; countj++) {
+ *(bTrans + counti + countj*lda) = *(B + countj + counti*lda);
+ }
+ }
+
+
+ int BLOCKSIZE = 8;
+ for ( j = 0; j < lda; j++ )
+ {
+ //for ( int jTemp = j; jTemp < j + BLOCKSIZE; jTemp++ ) {
+ int jFlag = j*lda;
+ for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i+=BLOCKSIZE ) {
+ for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) {
+
+ int iFlag = iTemp*lda;
+ int cLoc = jFlag+iTemp;
+ for ( k = 0; k < lda; k+=16) {
+ *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k);
+ *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1);
+ *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2);
+ *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3);
+ *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4);
+ *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5);
+ *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6);
+ *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7);
+ *(C+cLoc) += *(A+jFlag+k+8) * *(bTrans+iFlag+k+8);
+ *(C+cLoc) += *(A+jFlag+k+9) * *(bTrans+iFlag+k+9);
+ *(C+cLoc) += *(A+jFlag+k+10) * *(bTrans+iFlag+k+10);
+ *(C+cLoc) += *(A+jFlag+k+11) * *(bTrans+iFlag+k+11);
+ *(C+cLoc) += *(A+jFlag+k+12) * *(bTrans+iFlag+k+12);
+ *(C+cLoc) += *(A+jFlag+k+13) * *(bTrans+iFlag+k+13);
+ *(C+cLoc) += *(A+jFlag+k+14) * *(bTrans+iFlag+k+14);
+ *(C+cLoc) += *(A+jFlag+k+15) * *(bTrans+iFlag+k+15);
+ }
+ }
+ }
+ //}
+ }
+
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
-unsigned long _c = -rdcycle(), _i = -rdinstret(); \
-code; \
-_c += rdcycle(), _i += rdinstret(); \
-if (coreid == 0) \
-printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
-stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
-} while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- {
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
- }
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k;
-
- /*547287
- for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ )
- {
- for ( j = 0; j < lda; j++ )
- {
- int aIndex = j*lda;
- int cIndex = i + aIndex;
- C[cIndex] += A[aIndex] * B[i];
- C[cIndex] += A[aIndex + 1] * B[1*lda + i];
- C[cIndex] += A[aIndex + 2] * B[2*lda + i];
- C[cIndex] += A[aIndex + 3] * B[3*lda + i];
- C[cIndex] += A[aIndex + 4] * B[4*lda + i];
- C[cIndex] += A[aIndex + 5] * B[5*lda + i];
- C[cIndex] += A[aIndex + 6] * B[6*lda + i];
- C[cIndex] += A[aIndex + 7] * B[7*lda + i];
- C[cIndex] += A[aIndex + 8] * B[8*lda + i];
- C[cIndex] += A[aIndex + 9] * B[9*lda + i];
- C[cIndex] += A[aIndex + 10] * B[10*lda + i];
- C[cIndex] += A[aIndex + 11] * B[11*lda + i];
- C[cIndex] += A[aIndex + 12] * B[12*lda + i];
- C[cIndex] += A[aIndex + 13] * B[13*lda + i];
- C[cIndex] += A[aIndex + 14] * B[14*lda + i];
- C[cIndex] += A[aIndex + 15] * B[15*lda + i];
- C[cIndex] += A[aIndex + 16] * B[16*lda + i];
- C[cIndex] += A[aIndex + 17] * B[17*lda + i];
- C[cIndex] += A[aIndex + 18] * B[18*lda + i];
- C[cIndex] += A[aIndex + 19] * B[19*lda + i];
- C[cIndex] += A[aIndex + 20] * B[20*lda + i];
- C[cIndex] += A[aIndex + 21] * B[21*lda + i];
- C[cIndex] += A[aIndex + 22] * B[22*lda + i];
- C[cIndex] += A[aIndex + 23] * B[23*lda + i];
- C[cIndex] += A[aIndex + 24] * B[24*lda + i];
- C[cIndex] += A[aIndex + 25] * B[25*lda + i];
- C[cIndex] += A[aIndex + 26] * B[26*lda + i];
- C[cIndex] += A[aIndex + 27] * B[27*lda + i];
- C[cIndex] += A[aIndex + 28] * B[28*lda + i];
- C[cIndex] += A[aIndex + 29] * B[29*lda + i];
- C[cIndex] += A[aIndex + 30] * B[30*lda + i];
- C[cIndex] += A[aIndex + 31] * B[31*lda + i];
- }
- }
- */
-
- //492827
- /* for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ )
- {
- for ( j = 0; j < lda; j++ )
- {
-
- int aIndex = j*lda;
- int cIndex = i + aIndex;
- for ( k = 0; k < lda; k++)
- {
- C[cIndex] += A[aIndex + k] * B[k*lda + i];
- /* C[cIndex] += A[aIndex + k+1] * B[(k+1)*lda + i];
- C[cIndex] += A[aIndex + k+2] * B[(k+2)*lda + i];
- C[cIndex] += A[aIndex + k+3] * B[(k+3)*lda + i];
- C[cIndex] += A[aIndex + k+4] * B[(k+4)*lda + i];
- C[cIndex] += A[aIndex + k+5] * B[(k+5)*lda + i];
- C[cIndex] += A[aIndex + k+6] * B[(k+6)*lda + i];
- C[cIndex] += A[aIndex + k+7] * B[(k+7)*lda + i];
- C[cIndex] += A[aIndex + k+8] * B[(k+8)*lda + i];
- C[cIndex] += A[aIndex + k+9] * B[(k+9)*lda + i];
- C[cIndex] += A[aIndex + k+10] * B[(k+10)*lda + i];
- C[cIndex] += A[aIndex + k+11] * B[(k+11)*lda + i];
- C[cIndex] += A[aIndex + k+12] * B[(k+12)*lda + i];
- C[cIndex] += A[aIndex + k+13] * B[(k+13)*lda + i];
- C[cIndex] += A[aIndex + k+14] * B[(k+14)*lda + i];
- C[cIndex] += A[aIndex + k+15] * B[(k+15)*lda + i];*/
- /* }
- }
- }*/
- /*
- //326378
- data_t bTrans[1024];
-
- for (int counti = 0; counti < 32; counti++) {
- for (int countj = 0; countj < 32; countj++) {
- *(bTrans + counti + countj*lda) = *(B + countj + counti*lda);
- }
- }
-
-
- int BLOCKSIZE = 8;
- for ( i = 0; i < lda; i+=BLOCKSIZE )
- {
- for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) {
- int iFlag = iTemp*lda;
- for ( j = coreid*lda/ncores; j < (coreid+1)*lda/ncores; j++ ) {
- int jFlag = j*lda;
- int cLoc = jFlag+iTemp;
- for ( k = 0; k < lda; k+=8) {
- *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k);
- *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1);
- *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2);
- *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3);
- *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4);
- *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5);
- *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6);
- *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7);
- }
- }
- }
- }*/
- data_t bTrans[1024];
-
- for (int counti = 0; counti < 32; counti++) {
- for (int countj = 0; countj < 32; countj++) {
- *(bTrans + counti + countj*lda) = *(B + countj + counti*lda);
- }
- }
-
-
- int BLOCKSIZE = 8;
- for ( j = 0; j < lda; j++ )
- {
- //for ( int jTemp = j; jTemp < j + BLOCKSIZE; jTemp++ ) {
- int jFlag = j*lda;
- for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i+=BLOCKSIZE ) {
- for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) {
-
- int iFlag = iTemp*lda;
- int cLoc = jFlag+iTemp;
- for ( k = 0; k < lda; k+=16) {
- *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k);
- *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1);
- *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2);
- *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3);
- *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4);
- *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5);
- *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6);
- *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7);
- *(C+cLoc) += *(A+jFlag+k+8) * *(bTrans+iFlag+k+8);
- *(C+cLoc) += *(A+jFlag+k+9) * *(bTrans+iFlag+k+9);
- *(C+cLoc) += *(A+jFlag+k+10) * *(bTrans+iFlag+k+10);
- *(C+cLoc) += *(A+jFlag+k+11) * *(bTrans+iFlag+k+11);
- *(C+cLoc) += *(A+jFlag+k+12) * *(bTrans+iFlag+k+12);
- *(C+cLoc) += *(A+jFlag+k+13) * *(bTrans+iFlag+k+13);
- *(C+cLoc) += *(A+jFlag+k+14) * *(bTrans+iFlag+k+14);
- *(C+cLoc) += *(A+jFlag+k+15) * *(bTrans+iFlag+k+15);
- }
- }
- }
- //}
- }
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: Anirudh Garg
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
-unsigned long _c = -rdcycle(), _i = -rdinstret(); \
-code; \
-_c += rdcycle(), _i += rdinstret(); \
-if (coreid == 0) \
-printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
-stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
-} while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- {
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
- }
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k;
-
- /*547287
- for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ )
- {
- for ( j = 0; j < lda; j++ )
- {
- int aIndex = j*lda;
- int cIndex = i + aIndex;
- C[cIndex] += A[aIndex] * B[i];
- C[cIndex] += A[aIndex + 1] * B[1*lda + i];
- C[cIndex] += A[aIndex + 2] * B[2*lda + i];
- C[cIndex] += A[aIndex + 3] * B[3*lda + i];
- C[cIndex] += A[aIndex + 4] * B[4*lda + i];
- C[cIndex] += A[aIndex + 5] * B[5*lda + i];
- C[cIndex] += A[aIndex + 6] * B[6*lda + i];
- C[cIndex] += A[aIndex + 7] * B[7*lda + i];
- C[cIndex] += A[aIndex + 8] * B[8*lda + i];
- C[cIndex] += A[aIndex + 9] * B[9*lda + i];
- C[cIndex] += A[aIndex + 10] * B[10*lda + i];
- C[cIndex] += A[aIndex + 11] * B[11*lda + i];
- C[cIndex] += A[aIndex + 12] * B[12*lda + i];
- C[cIndex] += A[aIndex + 13] * B[13*lda + i];
- C[cIndex] += A[aIndex + 14] * B[14*lda + i];
- C[cIndex] += A[aIndex + 15] * B[15*lda + i];
- C[cIndex] += A[aIndex + 16] * B[16*lda + i];
- C[cIndex] += A[aIndex + 17] * B[17*lda + i];
- C[cIndex] += A[aIndex + 18] * B[18*lda + i];
- C[cIndex] += A[aIndex + 19] * B[19*lda + i];
- C[cIndex] += A[aIndex + 20] * B[20*lda + i];
- C[cIndex] += A[aIndex + 21] * B[21*lda + i];
- C[cIndex] += A[aIndex + 22] * B[22*lda + i];
- C[cIndex] += A[aIndex + 23] * B[23*lda + i];
- C[cIndex] += A[aIndex + 24] * B[24*lda + i];
- C[cIndex] += A[aIndex + 25] * B[25*lda + i];
- C[cIndex] += A[aIndex + 26] * B[26*lda + i];
- C[cIndex] += A[aIndex + 27] * B[27*lda + i];
- C[cIndex] += A[aIndex + 28] * B[28*lda + i];
- C[cIndex] += A[aIndex + 29] * B[29*lda + i];
- C[cIndex] += A[aIndex + 30] * B[30*lda + i];
- C[cIndex] += A[aIndex + 31] * B[31*lda + i];
- }
- }
- */
-
- //492827
- /* for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i++ )
- {
- for ( j = 0; j < lda; j++ )
- {
-
- int aIndex = j*lda;
- int cIndex = i + aIndex;
- for ( k = 0; k < lda; k++)
- {
- C[cIndex] += A[aIndex + k] * B[k*lda + i];
- /* C[cIndex] += A[aIndex + k+1] * B[(k+1)*lda + i];
- C[cIndex] += A[aIndex + k+2] * B[(k+2)*lda + i];
- C[cIndex] += A[aIndex + k+3] * B[(k+3)*lda + i];
- C[cIndex] += A[aIndex + k+4] * B[(k+4)*lda + i];
- C[cIndex] += A[aIndex + k+5] * B[(k+5)*lda + i];
- C[cIndex] += A[aIndex + k+6] * B[(k+6)*lda + i];
- C[cIndex] += A[aIndex + k+7] * B[(k+7)*lda + i];
- C[cIndex] += A[aIndex + k+8] * B[(k+8)*lda + i];
- C[cIndex] += A[aIndex + k+9] * B[(k+9)*lda + i];
- C[cIndex] += A[aIndex + k+10] * B[(k+10)*lda + i];
- C[cIndex] += A[aIndex + k+11] * B[(k+11)*lda + i];
- C[cIndex] += A[aIndex + k+12] * B[(k+12)*lda + i];
- C[cIndex] += A[aIndex + k+13] * B[(k+13)*lda + i];
- C[cIndex] += A[aIndex + k+14] * B[(k+14)*lda + i];
- C[cIndex] += A[aIndex + k+15] * B[(k+15)*lda + i];*/
- /* }
- }
- }*/
- /*
- //326378
- data_t bTrans[1024];
-
- for (int counti = 0; counti < 32; counti++) {
- for (int countj = 0; countj < 32; countj++) {
- *(bTrans + counti + countj*lda) = *(B + countj + counti*lda);
- }
- }
-
-
- int BLOCKSIZE = 8;
- for ( i = 0; i < lda; i+=BLOCKSIZE )
- {
- for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) {
- int iFlag = iTemp*lda;
- for ( j = coreid*lda/ncores; j < (coreid+1)*lda/ncores; j++ ) {
- int jFlag = j*lda;
- int cLoc = jFlag+iTemp;
- for ( k = 0; k < lda; k+=8) {
- *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k);
- *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1);
- *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2);
- *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3);
- *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4);
- *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5);
- *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6);
- *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7);
- }
- }
- }
- }*/
- data_t bTrans[1024];
-
- for (int counti = coreid*32/ncores; counti < (coreid+1)*lda/ncores; counti++) {
- for (int countj = 0; countj < 32; countj++) {
- *(bTrans + counti + countj*lda) = *(B + countj + counti*lda);
- }
- }
-
-
- int BLOCKSIZE = 8;
- for ( j = 0; j < lda; j++ )
- {
- //for ( int jTemp = j; jTemp < j + BLOCKSIZE; jTemp++ ) {
- int jFlag = j*lda;
- for ( i = coreid*lda/ncores; i < (coreid+1)*lda/ncores; i+=BLOCKSIZE ) {
- for ( int iTemp = i; iTemp < i + BLOCKSIZE; iTemp++ ) {
-
- int iFlag = iTemp*lda;
- int cLoc = jFlag+iTemp;
- for ( k = 0; k < lda; k+=16) {
- *(C+cLoc) += *(A+jFlag+k) * *(bTrans+iFlag+k);
- *(C+cLoc) += *(A+jFlag+k+1) * *(bTrans+iFlag+k+1);
- *(C+cLoc) += *(A+jFlag+k+2) * *(bTrans+iFlag+k+2);
- *(C+cLoc) += *(A+jFlag+k+3) * *(bTrans+iFlag+k+3);
- *(C+cLoc) += *(A+jFlag+k+4) * *(bTrans+iFlag+k+4);
- *(C+cLoc) += *(A+jFlag+k+5) * *(bTrans+iFlag+k+5);
- *(C+cLoc) += *(A+jFlag+k+6) * *(bTrans+iFlag+k+6);
- *(C+cLoc) += *(A+jFlag+k+7) * *(bTrans+iFlag+k+7);
- *(C+cLoc) += *(A+jFlag+k+8) * *(bTrans+iFlag+k+8);
- *(C+cLoc) += *(A+jFlag+k+9) * *(bTrans+iFlag+k+9);
- *(C+cLoc) += *(A+jFlag+k+10) * *(bTrans+iFlag+k+10);
- *(C+cLoc) += *(A+jFlag+k+11) * *(bTrans+iFlag+k+11);
- *(C+cLoc) += *(A+jFlag+k+12) * *(bTrans+iFlag+k+12);
- *(C+cLoc) += *(A+jFlag+k+13) * *(bTrans+iFlag+k+13);
- *(C+cLoc) += *(A+jFlag+k+14) * *(bTrans+iFlag+k+14);
- *(C+cLoc) += *(A+jFlag+k+15) * *(bTrans+iFlag+k+15);
- }
- }
- }
- //}
- }
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
-
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
-
-
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
-
- // interleave accesses
- for (i = (coreid*n)/ncores; i < ((coreid+1)*n)/ncores; i++)
- {
-
-
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ //-----------------------------------------------------------------version 2.16, optimize v2.15 get rid of tempb. MSI 83K.w/ test one 81K.
+
+
+ static __thread data_t TempA[8];
+ static __thread data_t TempB[8];
+ static __thread data_t TempC[8];
+ static __thread int j,m,n;
+
+ if(coreid == 1 || ncores == 1 )
+ {
+ for ( j = 16; j < 32; j++ )
+ {
+
+ for ( m = 0; m < 4; m++ )
+ {
+
+ TempA[0] = A[j*lda+0+8*m];
+ TempA[1] = A[j*lda+1+8*m];
+ TempA[2] = A[j*lda+2+8*m];
+ TempA[3] = A[j*lda+3+8*m];
+ TempA[4] = A[j*lda+4+8*m];
+ TempA[5] = A[j*lda+5+8*m];
+ TempA[6] = A[j*lda+6+8*m];
+ TempA[7] = A[j*lda+7+8*m];
+
+
+
+ for( n = 0; n < 4; n++)
+ {
+
+
+
+
+
+ TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n];
+ TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n];
+ TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n];
+ TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n];
+ TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n];
+ TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n];
+ TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n];
+ TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n];
+
+
+ TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n];
+ TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n];
+ TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n];
+ TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n];
+ TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n];
+ TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n];
+ TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n];
+ TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n];
+
+
+
+ TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n];
+ TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n];
+ TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n];
+ TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n];
+ TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n];
+ TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n];
+ TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n];
+ TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n];
+
+
+
+ TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n];
+ TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n];
+ TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n];
+ TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n];
+ TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n];
+ TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n];
+ TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n];
+ TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n];
+
+ TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n];
+ TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n];
+ TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n];
+ TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n];
+ TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n];
+ TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n];
+ TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n];
+ TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n];
+
+
+ TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n];
+ TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n];
+ TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n];
+ TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n];
+ TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n];
+ TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n];
+ TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n];
+ TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n];
+
+
+
+ TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n];
+ TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n];
+ TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n];
+ TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n];
+ TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n];
+ TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n];
+ TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n];
+ TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n];
+
+
+ TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n];
+ TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n];
+ TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n];
+ TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n];
+ TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n];
+ TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n];
+ TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n];
+ TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n];
+
+
+
+ C[0+8*n+j*lda] += TempC[0];
+ C[1+8*n+j*lda] += TempC[1];
+ C[2+8*n+j*lda] += TempC[2];
+ C[3+8*n+j*lda] += TempC[3];
+ C[4+8*n+j*lda] += TempC[4];
+ C[5+8*n+j*lda] += TempC[5];
+ C[6+8*n+j*lda] += TempC[6];
+ C[7+8*n+j*lda] += TempC[7];
+ }
+ }
+ }
+ }
+ if(coreid == 0)
+ {
+ for ( j = 0; j < 16; j++ )
+ {
+
+ for ( m = 0; m < 4; m++ )
+ {
+
+ TempA[0] = A[j*lda+0+8*m];
+ TempA[1] = A[j*lda+1+8*m];
+ TempA[2] = A[j*lda+2+8*m];
+ TempA[3] = A[j*lda+3+8*m];
+ TempA[4] = A[j*lda+4+8*m];
+ TempA[5] = A[j*lda+5+8*m];
+ TempA[6] = A[j*lda+6+8*m];
+ TempA[7] = A[j*lda+7+8*m];
+
+
+
+ for( n = 0; n < 4; n++)
+ {
+
+
+
+
+
+ TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n];
+ TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n];
+ TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n];
+ TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n];
+ TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n];
+ TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n];
+ TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n];
+ TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n];
+
+
+ TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n];
+ TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n];
+ TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n];
+ TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n];
+ TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n];
+ TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n];
+ TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n];
+ TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n];
+
+
+
+ TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n];
+ TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n];
+ TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n];
+ TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n];
+ TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n];
+ TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n];
+ TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n];
+ TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n];
+
+
+
+ TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n];
+ TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n];
+ TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n];
+ TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n];
+ TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n];
+ TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n];
+ TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n];
+ TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n];
+
+ TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n];
+ TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n];
+ TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n];
+ TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n];
+ TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n];
+ TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n];
+ TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n];
+ TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n];
+
+
+ TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n];
+ TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n];
+ TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n];
+ TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n];
+ TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n];
+ TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n];
+ TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n];
+ TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n];
+
+
+
+ TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n];
+ TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n];
+ TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n];
+ TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n];
+ TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n];
+ TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n];
+ TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n];
+ TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n];
+
+
+ TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n];
+ TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n];
+ TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n];
+ TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n];
+ TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n];
+ TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n];
+ TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n];
+ TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n];
+
+
+
+ C[0+8*n+j*lda] += TempC[0];
+ C[1+8*n+j*lda] += TempC[1];
+ C[2+8*n+j*lda] += TempC[2];
+ C[3+8*n+j*lda] += TempC[3];
+ C[4+8*n+j*lda] += TempC[4];
+ C[5+8*n+j*lda] += TempC[5];
+ C[6+8*n+j*lda] += TempC[6];
+ C[7+8*n+j*lda] += TempC[7];
+ }
+ }
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- //-------------------------------------------------------------first working version best 500k
- /*
- static __thread int i, j, k;
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
- }
- }
-
- if(coreid ==1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0;k < lda; k++)
- {
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
-
- }
- }
- }
- }
- */
- //-------------------------------------------------------------version1.1, take read out of inner loop,300k
- /*
- static __thread int i, j, k;
- static __thread data_t TempA;
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += TempA* B[k*lda + i];
- }
- }
- }
- }
-
- if(coreid ==1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0;k < lda; k++)
- {
- TempA = A[j*lda + k];
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += TempA* B[k*lda + i];
- }
- }
- }
- }
- */
- //-------------------------------------------------------------version2.0, read 8 elements in B at one time. 140k mi, MSI117.0k
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k*lda+0+8*n];
- TempB[1] = B[k*lda+1+8*n];
- TempB[2] = B[k*lda+2+8*n];
- TempB[3] = B[k*lda+3+8*n];
- TempB[4] = B[k*lda+4+8*n];
- TempB[5] = B[k*lda+5+8*n];
- TempB[6] = B[k*lda+6+8*n];
- TempB[7] = B[k*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA * TempB[0];
- C[1+8*n+j*lda] += TempA * TempB[1];
- C[2+8*n+j*lda] += TempA * TempB[2];
- C[3+8*n+j*lda] += TempA * TempB[3];
- C[4+8*n+j*lda] += TempA * TempB[4];
- C[5+8*n+j*lda] += TempA * TempB[5];
- C[6+8*n+j*lda] += TempA * TempB[6];
- C[7+8*n+j*lda] += TempA * TempB[7];
-
- }
-
- }
- }
- }
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k*lda+0+8*n];
- TempB[1] = B[k*lda+1+8*n];
- TempB[2] = B[k*lda+2+8*n];
- TempB[3] = B[k*lda+3+8*n];
- TempB[4] = B[k*lda+4+8*n];
- TempB[5] = B[k*lda+5+8*n];
- TempB[6] = B[k*lda+6+8*n];
- TempB[7] = B[k*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA * TempB[0];
- C[1+8*n+j*lda] += TempA * TempB[1];
- C[2+8*n+j*lda] += TempA * TempB[2];
- C[3+8*n+j*lda] += TempA * TempB[3];
- C[4+8*n+j*lda] += TempA * TempB[4];
- C[5+8*n+j*lda] += TempA * TempB[5];
- C[6+8*n+j*lda] += TempA * TempB[6];
- C[7+8*n+j*lda] += TempA * TempB[7];
-
- }
-
- }
- }
- }
- */
-
- //-------------------------------------------------------------version2.1, optimize k. 700k. bad move to v2.2.
- //-------------------------------------------------------------version2.9 take off all inner loops for both cores, MSI,109K. MI 182k
- //-------------------------------------------------------------version2.10 use i= j*lda inside the n loop increase speed. but not out m and n. tried replace first 3, get 104.9k
- /*
- static __thread int j, m, i,n;
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- i = j*lda;
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[0] * TempB[0];
- C[1+8*n+i] += TempA[0] * TempB[1];
- C[2+8*n+i] += TempA[0] * TempB[2];
- C[3+8*n+i] += TempA[0] * TempB[3];
- C[4+8*n+i] += TempA[0] * TempB[4];
- C[5+8*n+i] += TempA[0] * TempB[5];
- C[6+8*n+i] += TempA[0] * TempB[6];
- C[7+8*n+i] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[1] * TempB[0];
- C[1+8*n+i] += TempA[1] * TempB[1];
- C[2+8*n+i] += TempA[1] * TempB[2];
- C[3+8*n+i] += TempA[1] * TempB[3];
- C[4+8*n+i] += TempA[1] * TempB[4];
- C[5+8*n+i] += TempA[1] * TempB[5];
- C[6+8*n+i] += TempA[1] * TempB[6];
- C[7+8*n+i] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[2] * TempB[0];
- C[1+8*n+i] += TempA[2] * TempB[1];
- C[2+8*n+i] += TempA[2] * TempB[2];
- C[3+8*n+i] += TempA[2] * TempB[3];
- C[4+8*n+i] += TempA[2] * TempB[4];
- C[5+8*n+i] += TempA[2] * TempB[5];
- C[6+8*n+i] += TempA[2] * TempB[6];
- C[7+8*n+i] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[3] * TempB[0];
- C[1+8*n+i] += TempA[3] * TempB[1];
- C[2+8*n+i] += TempA[3] * TempB[2];
- C[3+8*n+i] += TempA[3] * TempB[3];
- C[4+8*n+i] += TempA[3] * TempB[4];
- C[5+8*n+i] += TempA[3] * TempB[5];
- C[6+8*n+i] += TempA[3] * TempB[6];
- C[7+8*n+i] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[4] * TempB[0];
- C[1+8*n+i] += TempA[4] * TempB[1];
- C[2+8*n+i] += TempA[4] * TempB[2];
- C[3+8*n+i] += TempA[4] * TempB[3];
- C[4+8*n+i] += TempA[4] * TempB[4];
- C[5+8*n+i] += TempA[4] * TempB[5];
- C[6+8*n+i] += TempA[4] * TempB[6];
- C[7+8*n+i] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[5] * TempB[0];
- C[1+8*n+i] += TempA[5] * TempB[1];
- C[2+8*n+i] += TempA[5] * TempB[2];
- C[3+8*n+i] += TempA[5] * TempB[3];
- C[4+8*n+i] += TempA[5] * TempB[4];
- C[5+8*n+i] += TempA[5] * TempB[5];
- C[6+8*n+i] += TempA[5] * TempB[6];
- C[7+8*n+i] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[6] * TempB[0];
- C[1+8*n+i] += TempA[6] * TempB[1];
- C[2+8*n+i] += TempA[6] * TempB[2];
- C[3+8*n+i] += TempA[6] * TempB[3];
- C[4+8*n+i] += TempA[6] * TempB[4];
- C[5+8*n+i] += TempA[6] * TempB[5];
- C[6+8*n+i] += TempA[6] * TempB[6];
- C[7+8*n+i] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[7] * TempB[0];
- C[1+8*n+i] += TempA[7] * TempB[1];
- C[2+8*n+i] += TempA[7] * TempB[2];
- C[3+8*n+i] += TempA[7] * TempB[3];
- C[4+8*n+i] += TempA[7] * TempB[4];
- C[5+8*n+i] += TempA[7] * TempB[5];
- C[6+8*n+i] += TempA[7] * TempB[6];
- C[7+8*n+i] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- i = j*lda;
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[0] * TempB[0];
- C[1+8*n+i] += TempA[0] * TempB[1];
- C[2+8*n+i] += TempA[0] * TempB[2];
- C[3+8*n+i] += TempA[0] * TempB[3];
- C[4+8*n+i] += TempA[0] * TempB[4];
- C[5+8*n+i] += TempA[0] * TempB[5];
- C[6+8*n+i] += TempA[0] * TempB[6];
- C[7+8*n+i] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[1] * TempB[0];
- C[1+8*n+i] += TempA[1] * TempB[1];
- C[2+8*n+i] += TempA[1] * TempB[2];
- C[3+8*n+i] += TempA[1] * TempB[3];
- C[4+8*n+i] += TempA[1] * TempB[4];
- C[5+8*n+i] += TempA[1] * TempB[5];
- C[6+8*n+i] += TempA[1] * TempB[6];
- C[7+8*n+i] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[2] * TempB[0];
- C[1+8*n+i] += TempA[2] * TempB[1];
- C[2+8*n+i] += TempA[2] * TempB[2];
- C[3+8*n+i] += TempA[2] * TempB[3];
- C[4+8*n+i] += TempA[2] * TempB[4];
- C[5+8*n+i] += TempA[2] * TempB[5];
- C[6+8*n+i] += TempA[2] * TempB[6];
- C[7+8*n+i] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[3] * TempB[0];
- C[1+8*n+i] += TempA[3] * TempB[1];
- C[2+8*n+i] += TempA[3] * TempB[2];
- C[3+8*n+i] += TempA[3] * TempB[3];
- C[4+8*n+i] += TempA[3] * TempB[4];
- C[5+8*n+i] += TempA[3] * TempB[5];
- C[6+8*n+i] += TempA[3] * TempB[6];
- C[7+8*n+i] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[4] * TempB[0];
- C[1+8*n+i] += TempA[4] * TempB[1];
- C[2+8*n+i] += TempA[4] * TempB[2];
- C[3+8*n+i] += TempA[4] * TempB[3];
- C[4+8*n+i] += TempA[4] * TempB[4];
- C[5+8*n+i] += TempA[4] * TempB[5];
- C[6+8*n+i] += TempA[4] * TempB[6];
- C[7+8*n+i] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[5] * TempB[0];
- C[1+8*n+i] += TempA[5] * TempB[1];
- C[2+8*n+i] += TempA[5] * TempB[2];
- C[3+8*n+i] += TempA[5] * TempB[3];
- C[4+8*n+i] += TempA[5] * TempB[4];
- C[5+8*n+i] += TempA[5] * TempB[5];
- C[6+8*n+i] += TempA[5] * TempB[6];
- C[7+8*n+i] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[6] * TempB[0];
- C[1+8*n+i] += TempA[6] * TempB[1];
- C[2+8*n+i] += TempA[6] * TempB[2];
- C[3+8*n+i] += TempA[6] * TempB[3];
- C[4+8*n+i] += TempA[6] * TempB[4];
- C[5+8*n+i] += TempA[6] * TempB[5];
- C[6+8*n+i] += TempA[6] * TempB[6];
- C[7+8*n+i] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[7] * TempB[0];
- C[1+8*n+i] += TempA[7] * TempB[1];
- C[2+8*n+i] += TempA[7] * TempB[2];
- C[3+8*n+i] += TempA[7] * TempB[3];
- C[4+8*n+i] += TempA[7] * TempB[4];
- C[5+8*n+i] += TempA[7] * TempB[5];
- C[6+8*n+i] += TempA[7] * TempB[6];
- C[7+8*n+i] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
-
- */
- //-------------------------------------------------------------version2.2, optimize k. from 4 instead of 8 like v2.1, random failing on MI, unknown reason, MSI,350K, take off each inner loop for core 0 260k, both cores 134k
- //-------------------------------------------------------------try false sharing for core 0, 136k.
- /*
- static __thread int j, m, n;
- static __thread data_t TempA[4];
- static __thread data_t TempB[4];
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( m = 0; m < 8; m++ )
- {
- TempA[0] = A[j*lda+0+4*m];
- TempA[1] = A[j*lda+1+4*m];
- TempA[2] = A[j*lda+2+4*m];
- TempA[3] = A[j*lda+3+4*m];
-
- for( n = 0; n < 8; n++)
- {
-
- TempB[0] = B[(0+4*m)*lda+0+4*n];
- TempB[1] = B[(0+4*m)*lda+1+4*n];
- TempB[2] = B[(0+4*m)*lda+2+4*n];
- TempB[3] = B[(0+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[0] * TempB[0];
- C[1+4*n+j*lda] += TempA[0] * TempB[1];
- C[2+4*n+j*lda] += TempA[0] * TempB[2];
- C[3+4*n+j*lda] += TempA[0] * TempB[3];
-
-
-
-
-
- TempB[0] = B[(1+4*m)*lda+0+4*n];
- TempB[1] = B[(1+4*m)*lda+1+4*n];
- TempB[2] = B[(1+4*m)*lda+2+4*n];
- TempB[3] = B[(1+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[1] * TempB[0];
- C[1+4*n+j*lda] += TempA[1] * TempB[1];
- C[2+4*n+j*lda] += TempA[1] * TempB[2];
- C[3+4*n+j*lda] += TempA[1] * TempB[3];
-
-
-
- TempB[0] = B[(2+4*m)*lda+0+4*n];
- TempB[1] = B[(2+4*m)*lda+1+4*n];
- TempB[2] = B[(2+4*m)*lda+2+4*n];
- TempB[3] = B[(2+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[2] * TempB[0];
- C[1+4*n+j*lda] += TempA[2] * TempB[1];
- C[2+4*n+j*lda] += TempA[2] * TempB[2];
- C[3+4*n+j*lda] += TempA[2] * TempB[3];
-
-
-
-
- TempB[0] = B[(3+4*m)*lda+0+4*n];
- TempB[1] = B[(3+4*m)*lda+1+4*n];
- TempB[2] = B[(3+4*m)*lda+2+4*n];
- TempB[3] = B[(3+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[3] * TempB[0];
- C[1+4*n+j*lda] += TempA[3] * TempB[1];
- C[2+4*n+j*lda] += TempA[3] * TempB[2];
- C[3+4*n+j*lda] += TempA[3] * TempB[3];
-
-
- }
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( m = 0; m < 8; m++ )
- {
- TempA[0] = A[j*lda+0+4*m];
- TempA[1] = A[j*lda+1+4*m];
- TempA[2] = A[j*lda+2+4*m];
- TempA[3] = A[j*lda+3+4*m];
-
- for( n = 0; n < 8; n++)
- {
-
-
-
-
-
-
-
- TempB[0] = B[(1+4*m)*lda+0+4*n];
- TempB[1] = B[(1+4*m)*lda+1+4*n];
- TempB[2] = B[(1+4*m)*lda+2+4*n];
- TempB[3] = B[(1+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[1] * TempB[0];
- C[1+4*n+j*lda] += TempA[1] * TempB[1];
- C[2+4*n+j*lda] += TempA[1] * TempB[2];
- C[3+4*n+j*lda] += TempA[1] * TempB[3];
-
-
-
- TempB[0] = B[(2+4*m)*lda+0+4*n];
- TempB[1] = B[(2+4*m)*lda+1+4*n];
- TempB[2] = B[(2+4*m)*lda+2+4*n];
- TempB[3] = B[(2+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[2] * TempB[0];
- C[1+4*n+j*lda] += TempA[2] * TempB[1];
- C[2+4*n+j*lda] += TempA[2] * TempB[2];
- C[3+4*n+j*lda] += TempA[2] * TempB[3];
-
-
-
-
- TempB[0] = B[(3+4*m)*lda+0+4*n];
- TempB[1] = B[(3+4*m)*lda+1+4*n];
- TempB[2] = B[(3+4*m)*lda+2+4*n];
- TempB[3] = B[(3+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[3] * TempB[0];
- C[1+4*n+j*lda] += TempA[3] * TempB[1];
- C[2+4*n+j*lda] += TempA[3] * TempB[2];
- C[3+4*n+j*lda] += TempA[3] * TempB[3];
-
- TempB[0] = B[(0+4*m)*lda+0+4*n];
- TempB[1] = B[(0+4*m)*lda+1+4*n];
- TempB[2] = B[(0+4*m)*lda+2+4*n];
- TempB[3] = B[(0+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[0] * TempB[0];
- C[1+4*n+j*lda] += TempA[0] * TempB[1];
- C[2+4*n+j*lda] += TempA[0] * TempB[2];
- C[3+4*n+j*lda] += TempA[0] * TempB[3];
-
-
- }
- }
- }
- }
- */
-
-
-
- //-------------------------------------------------------------version2.3, read 8 elements in B at one time. make k to 2. 150k mi 128k msi. worse than v2.0
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA[2];
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( m = 0; m < 16; m++ )
- {
- TempA[0] = A[j*lda + 0 + 2*m];
- TempA[1] = A[j*lda + 1 + 2*m];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[2*m*lda+0+8*n];
- TempB[1] = B[2*m*lda+1+8*n];
- TempB[2] = B[2*m*lda+2+8*n];
- TempB[3] = B[2*m*lda+3+8*n];
- TempB[4] = B[2*m*lda+4+8*n];
- TempB[5] = B[2*m*lda+5+8*n];
- TempB[6] = B[2*m*lda+6+8*n];
- TempB[7] = B[2*m*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
- TempB[0] = B[(1+2*m)*lda+0+8*n];
- TempB[1] = B[(1+2*m)*lda+1+8*n];
- TempB[2] = B[(1+2*m)*lda+2+8*n];
- TempB[3] = B[(1+2*m)*lda+3+8*n];
- TempB[4] = B[(1+2*m)*lda+4+8*n];
- TempB[5] = B[(1+2*m)*lda+5+8*n];
- TempB[6] = B[(1+2*m)*lda+6+8*n];
- TempB[7] = B[(1+2*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
- }
-
- }
- }
- }
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( m = 0; m < 16; m++ )
- {
- TempA[0] = A[j*lda + 0 + 2*m];
- TempA[1] = A[j*lda + 1 + 2*m];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[2*m*lda+0+8*n];
- TempB[1] = B[2*m*lda+1+8*n];
- TempB[2] = B[2*m*lda+2+8*n];
- TempB[3] = B[2*m*lda+3+8*n];
- TempB[4] = B[2*m*lda+4+8*n];
- TempB[5] = B[2*m*lda+5+8*n];
- TempB[6] = B[2*m*lda+6+8*n];
- TempB[7] = B[2*m*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
- TempB[0] = B[(1+2*m)*lda+0+8*n];
- TempB[1] = B[(1+2*m)*lda+1+8*n];
- TempB[2] = B[(1+2*m)*lda+2+8*n];
- TempB[3] = B[(1+2*m)*lda+3+8*n];
- TempB[4] = B[(1+2*m)*lda+4+8*n];
- TempB[5] = B[(1+2*m)*lda+5+8*n];
- TempB[6] = B[(1+2*m)*lda+6+8*n];
- TempB[7] = B[(1+2*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
- }
-
- }
- }
- }
- */
- //-------------------------------------------------------------version2.4, read 4 170k and 16 140k, error because not enough space elements in B at one time.
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[16];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
- TempB[8] = B[k*lda+8+16*n];
- TempB[9] = B[k*lda+9+16*n];
- TempB[10] = B[k*lda+10+16*n];
- TempB[11] = B[k*lda+11+16*n];
- TempB[12] = B[k*lda+12+16*n];
- TempB[13] = B[k*lda+13+16*n];
- TempB[14] = B[k*lda+14+16*n];
- TempB[15] = B[k*lda+15+16*n];
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
- C[8+16*n+j*lda] += TempA * TempB[8];
- C[9+16*n+j*lda] += TempA * TempB[9];
- C[10+16*n+j*lda] += TempA * TempB[10];
- C[11+16*n+j*lda] += TempA * TempB[11];
- C[12+16*n+j*lda] += TempA * TempB[12];
- C[13+16*n+j*lda] += TempA * TempB[13];
- C[14+16*n+j*lda] += TempA * TempB[14];
- C[15+16*n+j*lda] += TempA * TempB[15];
-
-
-
- }
-
- }
- }
- }
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
- TempB[8] = B[k*lda+8+16*n];
- TempB[9] = B[k*lda+9+16*n];
- TempB[10] = B[k*lda+10+16*n];
- TempB[11] = B[k*lda+11+16*n];
- TempB[12] = B[k*lda+12+16*n];
- TempB[13] = B[k*lda+13+16*n];
- TempB[14] = B[k*lda+14+16*n];
- TempB[15] = B[k*lda+15+16*n];
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
- C[8+16*n+j*lda] += TempA * TempB[8];
- C[9+16*n+j*lda] += TempA * TempB[9];
- C[10+16*n+j*lda] += TempA * TempB[10];
- C[11+16*n+j*lda] += TempA * TempB[11];
- C[12+16*n+j*lda] += TempA * TempB[12];
- C[13+16*n+j*lda] += TempA * TempB[13];
- C[14+16*n+j*lda] += TempA * TempB[14];
- C[15+16*n+j*lda] += TempA * TempB[15];
-
-
-
- }
-
- }
- }
- }
-
- */
- //-------------------------------------------------------------version2.5, read 10 elements in B at one time. has corner cases. Turns out it hangs.
- /*
- static __thread int j, k, n;
- static __thread data_t TempA;
- static __thread data_t TempB[10];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 3; n++)
- {
- TempB[0] = B[k*lda+0+10*n];
- TempB[1] = B[k*lda+1+10*n];
- TempB[2] = B[k*lda+2+10*n];
- TempB[3] = B[k*lda+3+10*n];
- TempB[4] = B[k*lda+4+10*n];
- TempB[5] = B[k*lda+5+10*n];
- TempB[6] = B[k*lda+6+10*n];
- TempB[7] = B[k*lda+7+10*n];
- TempB[8] = B[k*lda+8+10*n];
- TempB[9] = B[k*lda+9+10*n];
-
- C[0+10*n+j*lda] += TempA * TempB[0];
- C[1+10*n+j*lda] += TempA * TempB[1];
- C[2+10*n+j*lda] += TempA * TempB[2];
- C[3+10*n+j*lda] += TempA * TempB[3];
- C[4+10*n+j*lda] += TempA * TempB[4];
- C[5+10*n+j*lda] += TempA * TempB[5];
- C[6+10*n+j*lda] += TempA * TempB[6];
- C[7+10*n+j*lda] += TempA * TempB[7];
- C[8+10*n+j*lda] += TempA * TempB[8];
- C[9+10*n+j*lda] += TempA * TempB[9];
- }
- TempB[0] = B[k*lda+30];
- TempB[1] = B[k*lda+31];
- C[30+j*lda] += TempA * TempB[0];
- C[31+j*lda] += TempA * TempB[1];
- }
- }
- }
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 3; n++)
- {
- TempB[0] = B[k*lda+0+10*n];
- TempB[1] = B[k*lda+1+10*n];
- TempB[2] = B[k*lda+2+10*n];
- TempB[3] = B[k*lda+3+10*n];
- TempB[4] = B[k*lda+4+10*n];
- TempB[5] = B[k*lda+5+10*n];
- TempB[6] = B[k*lda+6+10*n];
- TempB[7] = B[k*lda+7+10*n];
- TempB[8] = B[k*lda+8+10*n];
- TempB[9] = B[k*lda+9+10*n];
-
- C[0+10*n+j*lda] += TempA * TempB[0];
- C[1+10*n+j*lda] += TempA * TempB[1];
- C[2+10*n+j*lda] += TempA * TempB[2];
- C[3+10*n+j*lda] += TempA * TempB[3];
- C[4+10*n+j*lda] += TempA * TempB[4];
- C[5+10*n+j*lda] += TempA * TempB[5];
- C[6+10*n+j*lda] += TempA * TempB[6];
- C[7+10*n+j*lda] += TempA * TempB[7];
- C[8+10*n+j*lda] += TempA * TempB[8];
- C[9+10*n+j*lda] += TempA * TempB[9];
- }
- TempB[0] = B[k*lda+30];
- TempB[1] = B[k*lda+31];
- C[30+j*lda] += TempA * TempB[0];
- C[31+j*lda] += TempA * TempB[1];
- }
- }
- }
-
- */
-
- //-------------------------------------------------------------version2.6, optimize 2.0. take off n loop and tried different order of reading B
- /*
- static __thread int j, k, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
-
- TempB[0] = B[k*lda+0];
- TempB[1] = B[k*lda+1];
- TempB[2] = B[k*lda+2];
- TempB[3] = B[k*lda+3];
- TempB[4] = B[k*lda+4];
- TempB[5] = B[k*lda+5];
- TempB[6] = B[k*lda+6];
- TempB[7] = B[k*lda+7];
-
- C[0+j*lda] += TempA * TempB[0];
- C[1+j*lda] += TempA * TempB[1];
- C[2+j*lda] += TempA * TempB[2];
- C[3+j*lda] += TempA * TempB[3];
- C[4+j*lda] += TempA * TempB[4];
- C[5+j*lda] += TempA * TempB[5];
- C[6+j*lda] += TempA * TempB[6];
- C[7+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+8];
- TempB[1] = B[k*lda+9];
- TempB[2] = B[k*lda+10];
- TempB[3] = B[k*lda+11];
- TempB[4] = B[k*lda+12];
- TempB[5] = B[k*lda+13];
- TempB[6] = B[k*lda+14];
- TempB[7] = B[k*lda+15];
-
- C[8+j*lda] += TempA * TempB[0];
- C[9+j*lda] += TempA * TempB[1];
- C[10+j*lda] += TempA * TempB[2];
- C[11+j*lda] += TempA * TempB[3];
- C[12+j*lda] += TempA * TempB[4];
- C[13+j*lda] += TempA * TempB[5];
- C[14+j*lda] += TempA * TempB[6];
- C[15+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+16];
- TempB[1] = B[k*lda+17];
- TempB[2] = B[k*lda+18];
- TempB[3] = B[k*lda+19];
- TempB[4] = B[k*lda+20];
- TempB[5] = B[k*lda+21];
- TempB[6] = B[k*lda+22];
- TempB[7] = B[k*lda+23];
-
- C[16+j*lda] += TempA * TempB[0];
- C[17+j*lda] += TempA * TempB[1];
- C[18+j*lda] += TempA * TempB[2];
- C[19+j*lda] += TempA * TempB[3];
- C[20+j*lda] += TempA * TempB[4];
- C[21+j*lda] += TempA * TempB[5];
- C[22+j*lda] += TempA * TempB[6];
- C[23+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+24];
- TempB[1] = B[k*lda+25];
- TempB[2] = B[k*lda+26];
- TempB[3] = B[k*lda+27];
- TempB[4] = B[k*lda+28];
- TempB[5] = B[k*lda+29];
- TempB[6] = B[k*lda+30];
- TempB[7] = B[k*lda+31];
-
- C[24+j*lda] += TempA * TempB[0];
- C[25+j*lda] += TempA * TempB[1];
- C[26+j*lda] += TempA * TempB[2];
- C[27+j*lda] += TempA * TempB[3];
- C[28+j*lda] += TempA * TempB[4];
- C[29+j*lda] += TempA * TempB[5];
- C[30+j*lda] += TempA * TempB[6];
- C[31+j*lda] += TempA * TempB[7];
-
-
-
- }
- }
- }
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
-
-
- TempB[0] = B[k*lda+24];
- TempB[1] = B[k*lda+25];
- TempB[2] = B[k*lda+26];
- TempB[3] = B[k*lda+27];
- TempB[4] = B[k*lda+28];
- TempB[5] = B[k*lda+29];
- TempB[6] = B[k*lda+30];
- TempB[7] = B[k*lda+31];
-
- C[24+j*lda] += TempA * TempB[0];
- C[25+j*lda] += TempA * TempB[1];
- C[26+j*lda] += TempA * TempB[2];
- C[27+j*lda] += TempA * TempB[3];
- C[28+j*lda] += TempA * TempB[4];
- C[29+j*lda] += TempA * TempB[5];
- C[30+j*lda] += TempA * TempB[6];
- C[31+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+0];
- TempB[1] = B[k*lda+1];
- TempB[2] = B[k*lda+2];
- TempB[3] = B[k*lda+3];
- TempB[4] = B[k*lda+4];
- TempB[5] = B[k*lda+5];
- TempB[6] = B[k*lda+6];
- TempB[7] = B[k*lda+7];
-
- C[0+j*lda] += TempA * TempB[0];
- C[1+j*lda] += TempA * TempB[1];
- C[2+j*lda] += TempA * TempB[2];
- C[3+j*lda] += TempA * TempB[3];
- C[4+j*lda] += TempA * TempB[4];
- C[5+j*lda] += TempA * TempB[5];
- C[6+j*lda] += TempA * TempB[6];
- C[7+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+8];
- TempB[1] = B[k*lda+9];
- TempB[2] = B[k*lda+10];
- TempB[3] = B[k*lda+11];
- TempB[4] = B[k*lda+12];
- TempB[5] = B[k*lda+13];
- TempB[6] = B[k*lda+14];
- TempB[7] = B[k*lda+15];
-
- C[8+j*lda] += TempA * TempB[0];
- C[9+j*lda] += TempA * TempB[1];
- C[10+j*lda] += TempA * TempB[2];
- C[11+j*lda] += TempA * TempB[3];
- C[12+j*lda] += TempA * TempB[4];
- C[13+j*lda] += TempA * TempB[5];
- C[14+j*lda] += TempA * TempB[6];
- C[15+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+16];
- TempB[1] = B[k*lda+17];
- TempB[2] = B[k*lda+18];
- TempB[3] = B[k*lda+19];
- TempB[4] = B[k*lda+20];
- TempB[5] = B[k*lda+21];
- TempB[6] = B[k*lda+22];
- TempB[7] = B[k*lda+23];
-
- C[16+j*lda] += TempA * TempB[0];
- C[17+j*lda] += TempA * TempB[1];
- C[18+j*lda] += TempA * TempB[2];
- C[19+j*lda] += TempA * TempB[3];
- C[20+j*lda] += TempA * TempB[4];
- C[21+j*lda] += TempA * TempB[5];
- C[22+j*lda] += TempA * TempB[6];
- C[23+j*lda] += TempA * TempB[7];
-
-
-
-
-
-
- }
- }
- }
- */
- //-------------------------------------------------------------version2.7, use m=l*da, i=k*lda,out of stack, only i, MI 150k, only m, MSI 117.9k slower than v2.0
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- m = j * lda;
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[m+ k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k *lda+0+8*n];
- TempB[1] = B[k *lda+1+8*n];
- TempB[2] = B[k *lda+2+8*n];
- TempB[3] = B[k *lda+3+8*n];
- TempB[4] = B[k *lda+4+8*n];
- TempB[5] = B[k *lda+5+8*n];
- TempB[6] = B[k *lda+6+8*n];
- TempB[7] = B[k *lda+7+8*n];
-
- C[0+8*n+m] += TempA * TempB[0];
- C[1+8*n+m] += TempA * TempB[1];
- C[2+8*n+m] += TempA * TempB[2];
- C[3+8*n+m] += TempA * TempB[3];
- C[4+8*n+m] += TempA * TempB[4];
- C[5+8*n+m] += TempA * TempB[5];
- C[6+8*n+m] += TempA * TempB[6];
- C[7+8*n+m] += TempA * TempB[7];
-
- }
-
- }
- }
- }
-if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- m = j * lda;
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[m+ k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k *lda+0+8*n];
- TempB[1] = B[k *lda+1+8*n];
- TempB[2] = B[k *lda+2+8*n];
- TempB[3] = B[k *lda+3+8*n];
- TempB[4] = B[k *lda+4+8*n];
- TempB[5] = B[k *lda+5+8*n];
- TempB[6] = B[k *lda+6+8*n];
- TempB[7] = B[k *lda+7+8*n];
-
- C[0+8*n+m] += TempA * TempB[0];
- C[1+8*n+m] += TempA * TempB[1];
- C[2+8*n+m] += TempA * TempB[2];
- C[3+8*n+m] += TempA * TempB[3];
- C[4+8*n+m] += TempA * TempB[4];
- C[5+8*n+m] += TempA * TempB[5];
- C[6+8*n+m] += TempA * TempB[6];
- C[7+8*n+m] += TempA * TempB[7];
-
- }
-
- }
- }
- }
- */
-//-------------------------------------------------------------version2.8 deal with false sharing, MSI,118K vs v2.0 117.0K. MI 147.629K.
-/*
-static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
-
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+8+16*n];
- TempB[1] = B[k*lda+9+16*n];
- TempB[2] = B[k*lda+10+16*n];
- TempB[3] = B[k*lda+11+16*n];
- TempB[4] = B[k*lda+12+16*n];
- TempB[5] = B[k*lda+13+16*n];
- TempB[6] = B[k*lda+14+16*n];
- TempB[7] = B[k*lda+15+16*n];
-
- C[8+16*n+j*lda] += TempA * TempB[0];
- C[9+16*n+j*lda] += TempA * TempB[1];
- C[10+16*n+j*lda] += TempA * TempB[2];
- C[11+16*n+j*lda] += TempA * TempB[3];
- C[12+16*n+j*lda] += TempA * TempB[4];
- C[13+16*n+j*lda] += TempA * TempB[5];
- C[14+16*n+j*lda] += TempA * TempB[6];
- C[15+16*n+j*lda] += TempA * TempB[7];
-
-
-
- }
-
- }
- }
- }
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
-
-
- TempB[0] = B[k*lda+8+16*n];
- TempB[1] = B[k*lda+9+16*n];
- TempB[2] = B[k*lda+10+16*n];
- TempB[3] = B[k*lda+11+16*n];
- TempB[4] = B[k*lda+12+16*n];
- TempB[5] = B[k*lda+13+16*n];
- TempB[6] = B[k*lda+14+16*n];
- TempB[7] = B[k*lda+15+16*n];
-
- C[8+16*n+j*lda] += TempA * TempB[0];
- C[9+16*n+j*lda] += TempA * TempB[1];
- C[10+16*n+j*lda] += TempA * TempB[2];
- C[11+16*n+j*lda] += TempA * TempB[3];
- C[12+16*n+j*lda] += TempA * TempB[4];
- C[13+16*n+j*lda] += TempA * TempB[5];
- C[14+16*n+j*lda] += TempA * TempB[6];
- C[15+16*n+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
-
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
-
-
- }
-
- }
- }
- }
- */
-
- //----------------------------------------------------------------version 2.11 optmize j,use core 1 j from 0 to 15 MSI 98k i = j*lda
- //----------------------------------------------------------------version 2.12 not use i = j *lda MSI 95k
- /*
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
- static __thread int j,m,n,i,k;
-
- if(coreid == 1)
- {
- for ( j = 16; j < 32; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[2] * TempB[0];
- C[1+8*n+j*lda] += TempA[2] * TempB[1];
- C[2+8*n+j*lda] += TempA[2] * TempB[2];
- C[3+8*n+j*lda] += TempA[2] * TempB[3];
- C[4+8*n+j*lda] += TempA[2] * TempB[4];
- C[5+8*n+j*lda] += TempA[2] * TempB[5];
- C[6+8*n+j*lda] += TempA[2] * TempB[6];
- C[7+8*n+j*lda] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[3] * TempB[0];
- C[1+8*n+j*lda] += TempA[3] * TempB[1];
- C[2+8*n+j*lda] += TempA[3] * TempB[2];
- C[3+8*n+j*lda] += TempA[3] * TempB[3];
- C[4+8*n+j*lda] += TempA[3] * TempB[4];
- C[5+8*n+j*lda] += TempA[3] * TempB[5];
- C[6+8*n+j*lda] += TempA[3] * TempB[6];
- C[7+8*n+j*lda] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[4] * TempB[0];
- C[1+8*n+j*lda] += TempA[4] * TempB[1];
- C[2+8*n+j*lda] += TempA[4] * TempB[2];
- C[3+8*n+j*lda] += TempA[4] * TempB[3];
- C[4+8*n+j*lda] += TempA[4] * TempB[4];
- C[5+8*n+j*lda] += TempA[4] * TempB[5];
- C[6+8*n+j*lda] += TempA[4] * TempB[6];
- C[7+8*n+j*lda] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[5] * TempB[0];
- C[1+8*n+j*lda] += TempA[5] * TempB[1];
- C[2+8*n+j*lda] += TempA[5] * TempB[2];
- C[3+8*n+j*lda] += TempA[5] * TempB[3];
- C[4+8*n+j*lda] += TempA[5] * TempB[4];
- C[5+8*n+j*lda] += TempA[5] * TempB[5];
- C[6+8*n+j*lda] += TempA[5] * TempB[6];
- C[7+8*n+j*lda] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[6] * TempB[0];
- C[1+8*n+j*lda] += TempA[6] * TempB[1];
- C[2+8*n+j*lda] += TempA[6] * TempB[2];
- C[3+8*n+j*lda] += TempA[6] * TempB[3];
- C[4+8*n+j*lda] += TempA[6] * TempB[4];
- C[5+8*n+j*lda] += TempA[6] * TempB[5];
- C[6+8*n+j*lda] += TempA[6] * TempB[6];
- C[7+8*n+j*lda] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[7] * TempB[0];
- C[1+8*n+j*lda] += TempA[7] * TempB[1];
- C[2+8*n+j*lda] += TempA[7] * TempB[2];
- C[3+8*n+j*lda] += TempA[7] * TempB[3];
- C[4+8*n+j*lda] += TempA[7] * TempB[4];
- C[5+8*n+j*lda] += TempA[7] * TempB[5];
- C[6+8*n+j*lda] += TempA[7] * TempB[6];
- C[7+8*n+j*lda] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- if(coreid ==0)
- {
- for ( j = 0; j < 16; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[2] * TempB[0];
- C[1+8*n+j*lda] += TempA[2] * TempB[1];
- C[2+8*n+j*lda] += TempA[2] * TempB[2];
- C[3+8*n+j*lda] += TempA[2] * TempB[3];
- C[4+8*n+j*lda] += TempA[2] * TempB[4];
- C[5+8*n+j*lda] += TempA[2] * TempB[5];
- C[6+8*n+j*lda] += TempA[2] * TempB[6];
- C[7+8*n+j*lda] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[3] * TempB[0];
- C[1+8*n+j*lda] += TempA[3] * TempB[1];
- C[2+8*n+j*lda] += TempA[3] * TempB[2];
- C[3+8*n+j*lda] += TempA[3] * TempB[3];
- C[4+8*n+j*lda] += TempA[3] * TempB[4];
- C[5+8*n+j*lda] += TempA[3] * TempB[5];
- C[6+8*n+j*lda] += TempA[3] * TempB[6];
- C[7+8*n+j*lda] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[4] * TempB[0];
- C[1+8*n+j*lda] += TempA[4] * TempB[1];
- C[2+8*n+j*lda] += TempA[4] * TempB[2];
- C[3+8*n+j*lda] += TempA[4] * TempB[3];
- C[4+8*n+j*lda] += TempA[4] * TempB[4];
- C[5+8*n+j*lda] += TempA[4] * TempB[5];
- C[6+8*n+j*lda] += TempA[4] * TempB[6];
- C[7+8*n+j*lda] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[5] * TempB[0];
- C[1+8*n+j*lda] += TempA[5] * TempB[1];
- C[2+8*n+j*lda] += TempA[5] * TempB[2];
- C[3+8*n+j*lda] += TempA[5] * TempB[3];
- C[4+8*n+j*lda] += TempA[5] * TempB[4];
- C[5+8*n+j*lda] += TempA[5] * TempB[5];
- C[6+8*n+j*lda] += TempA[5] * TempB[6];
- C[7+8*n+j*lda] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[6] * TempB[0];
- C[1+8*n+j*lda] += TempA[6] * TempB[1];
- C[2+8*n+j*lda] += TempA[6] * TempB[2];
- C[3+8*n+j*lda] += TempA[6] * TempB[3];
- C[4+8*n+j*lda] += TempA[6] * TempB[4];
- C[5+8*n+j*lda] += TempA[6] * TempB[5];
- C[6+8*n+j*lda] += TempA[6] * TempB[6];
- C[7+8*n+j*lda] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[7] * TempB[0];
- C[1+8*n+j*lda] += TempA[7] * TempB[1];
- C[2+8*n+j*lda] += TempA[7] * TempB[2];
- C[3+8*n+j*lda] += TempA[7] * TempB[3];
- C[4+8*n+j*lda] += TempA[7] * TempB[4];
- C[5+8*n+j*lda] += TempA[7] * TempB[5];
- C[6+8*n+j*lda] += TempA[7] * TempB[6];
- C[7+8*n+j*lda] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- */
- //-----------------------------------------------------------------version 2.14 optimize C. when tempc[8] inside n loop, MSI, 98K MI,158k
- //-----------------------------------------------------------------version 2.15 optimize v2.14 a little MSI 89k. MI, 161K. don't decare tempc[8]=0 in the loop
- /*
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
- static __thread data_t TempC[8];
- static __thread int j,m,n,i,k;
-
- if(coreid == 1)
- {
- for ( j = 16; j < 32; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
-
-
- for( n = 0; n < 4; n++)
- {
-
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
-
- TempC[0] = TempA[0] * TempB[0];
- TempC[1] = TempA[0] * TempB[1];
- TempC[2] = TempA[0] * TempB[2];
- TempC[3] = TempA[0] * TempB[3];
- TempC[4] = TempA[0] * TempB[4];
- TempC[5] = TempA[0] * TempB[5];
- TempC[6] = TempA[0] * TempB[6];
- TempC[7] = TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[1] * TempB[0];
- TempC[1] += TempA[1] * TempB[1];
- TempC[2] += TempA[1] * TempB[2];
- TempC[3] += TempA[1] * TempB[3];
- TempC[4] += TempA[1] * TempB[4];
- TempC[5] += TempA[1] * TempB[5];
- TempC[6] += TempA[1] * TempB[6];
- TempC[7] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[2] * TempB[0];
- TempC[1] += TempA[2] * TempB[1];
- TempC[2] += TempA[2] * TempB[2];
- TempC[3] += TempA[2] * TempB[3];
- TempC[4] += TempA[2] * TempB[4];
- TempC[5] += TempA[2] * TempB[5];
- TempC[6] += TempA[2] * TempB[6];
- TempC[7] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[3] * TempB[0];
- TempC[1] += TempA[3] * TempB[1];
- TempC[2] += TempA[3] * TempB[2];
- TempC[3] += TempA[3] * TempB[3];
- TempC[4] += TempA[3] * TempB[4];
- TempC[5] += TempA[3] * TempB[5];
- TempC[6] += TempA[3] * TempB[6];
- TempC[7] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[4] * TempB[0];
- TempC[1] += TempA[4] * TempB[1];
- TempC[2] += TempA[4] * TempB[2];
- TempC[3] += TempA[4] * TempB[3];
- TempC[4] += TempA[4] * TempB[4];
- TempC[5] += TempA[4] * TempB[5];
- TempC[6] += TempA[4] * TempB[6];
- TempC[7] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[5] * TempB[0];
- TempC[1] += TempA[5] * TempB[1];
- TempC[2] += TempA[5] * TempB[2];
- TempC[3] += TempA[5] * TempB[3];
- TempC[4] += TempA[5] * TempB[4];
- TempC[5] += TempA[5] * TempB[5];
- TempC[6] += TempA[5] * TempB[6];
- TempC[7] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[6] * TempB[0];
- TempC[1] += TempA[6] * TempB[1];
- TempC[2] += TempA[6] * TempB[2];
- TempC[3] += TempA[6] * TempB[3];
- TempC[4] += TempA[6] * TempB[4];
- TempC[5] += TempA[6] * TempB[5];
- TempC[6] += TempA[6] * TempB[6];
- TempC[7] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[7] * TempB[0];
- TempC[1] += TempA[7] * TempB[1];
- TempC[2] += TempA[7] * TempB[2];
- TempC[3] += TempA[7] * TempB[3];
- TempC[4] += TempA[7] * TempB[4];
- TempC[5] += TempA[7] * TempB[5];
- TempC[6] += TempA[7] * TempB[6];
- TempC[7] += TempA[7] * TempB[7];
-
-
-
- C[0+8*n+j*lda] += TempC[0];
- C[1+8*n+j*lda] += TempC[1];
- C[2+8*n+j*lda] += TempC[2];
- C[3+8*n+j*lda] += TempC[3];
- C[4+8*n+j*lda] += TempC[4];
- C[5+8*n+j*lda] += TempC[5];
- C[6+8*n+j*lda] += TempC[6];
- C[7+8*n+j*lda] += TempC[7];
- }
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < 16; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
-
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
-
- TempC[0] = TempA[0] * TempB[0];
- TempC[1] = TempA[0] * TempB[1];
- TempC[2] = TempA[0] * TempB[2];
- TempC[3] = TempA[0] * TempB[3];
- TempC[4] = TempA[0] * TempB[4];
- TempC[5] = TempA[0] * TempB[5];
- TempC[6] = TempA[0] * TempB[6];
- TempC[7] = TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[1] * TempB[0];
- TempC[1] += TempA[1] * TempB[1];
- TempC[2] += TempA[1] * TempB[2];
- TempC[3] += TempA[1] * TempB[3];
- TempC[4] += TempA[1] * TempB[4];
- TempC[5] += TempA[1] * TempB[5];
- TempC[6] += TempA[1] * TempB[6];
- TempC[7] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[2] * TempB[0];
- TempC[1] += TempA[2] * TempB[1];
- TempC[2] += TempA[2] * TempB[2];
- TempC[3] += TempA[2] * TempB[3];
- TempC[4] += TempA[2] * TempB[4];
- TempC[5] += TempA[2] * TempB[5];
- TempC[6] += TempA[2] * TempB[6];
- TempC[7] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[3] * TempB[0];
- TempC[1] += TempA[3] * TempB[1];
- TempC[2] += TempA[3] * TempB[2];
- TempC[3] += TempA[3] * TempB[3];
- TempC[4] += TempA[3] * TempB[4];
- TempC[5] += TempA[3] * TempB[5];
- TempC[6] += TempA[3] * TempB[6];
- TempC[7] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[4] * TempB[0];
- TempC[1] += TempA[4] * TempB[1];
- TempC[2] += TempA[4] * TempB[2];
- TempC[3] += TempA[4] * TempB[3];
- TempC[4] += TempA[4] * TempB[4];
- TempC[5] += TempA[4] * TempB[5];
- TempC[6] += TempA[4] * TempB[6];
- TempC[7] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[5] * TempB[0];
- TempC[1] += TempA[5] * TempB[1];
- TempC[2] += TempA[5] * TempB[2];
- TempC[3] += TempA[5] * TempB[3];
- TempC[4] += TempA[5] * TempB[4];
- TempC[5] += TempA[5] * TempB[5];
- TempC[6] += TempA[5] * TempB[6];
- TempC[7] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[6] * TempB[0];
- TempC[1] += TempA[6] * TempB[1];
- TempC[2] += TempA[6] * TempB[2];
- TempC[3] += TempA[6] * TempB[3];
- TempC[4] += TempA[6] * TempB[4];
- TempC[5] += TempA[6] * TempB[5];
- TempC[6] += TempA[6] * TempB[6];
- TempC[7] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[7] * TempB[0];
- TempC[1] += TempA[7] * TempB[1];
- TempC[2] += TempA[7] * TempB[2];
- TempC[3] += TempA[7] * TempB[3];
- TempC[4] += TempA[7] * TempB[4];
- TempC[5] += TempA[7] * TempB[5];
- TempC[6] += TempA[7] * TempB[6];
- TempC[7] += TempA[7] * TempB[7];
-
- C[0+8*n+j*lda] += TempC[0];
- C[1+8*n+j*lda] += TempC[1];
- C[2+8*n+j*lda] += TempC[2];
- C[3+8*n+j*lda] += TempC[3];
- C[4+8*n+j*lda] += TempC[4];
- C[5+8*n+j*lda] += TempC[5];
- C[6+8*n+j*lda] += TempC[6];
- C[7+8*n+j*lda] += TempC[7];
- }
-
- }
- }
- }
- */
- //-----------------------------------------------------------------version 2.16, optimize v2.15 get rid of tempb. MSI 83K.w/ test one 81K.
-
-
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
- static __thread data_t TempC[8];
- static __thread int j,m,n;
-
- if(coreid == 1)
- {
- for ( j = 16; j < 32; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
-
-
- for( n = 0; n < 4; n++)
- {
-
-
-
-
-
- TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n];
- TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n];
- TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n];
- TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n];
- TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n];
- TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n];
- TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n];
- TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n];
-
-
- TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n];
- TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n];
- TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n];
- TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n];
- TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n];
- TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n];
- TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n];
- TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n];
-
-
-
- TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n];
- TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n];
- TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n];
- TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n];
- TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n];
- TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n];
- TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n];
- TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n];
-
-
-
- TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n];
- TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n];
- TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n];
- TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n];
- TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n];
- TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n];
- TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n];
- TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n];
- TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n];
- TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n];
- TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n];
- TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n];
- TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n];
- TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n];
- TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n];
-
-
- TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n];
- TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n];
- TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n];
- TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n];
- TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n];
- TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n];
- TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n];
- TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n];
-
-
-
- TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n];
- TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n];
- TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n];
- TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n];
- TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n];
- TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n];
- TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n];
- TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n];
-
-
- TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n];
- TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n];
- TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n];
- TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n];
- TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n];
- TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n];
- TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n];
- TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n];
-
-
-
- C[0+8*n+j*lda] += TempC[0];
- C[1+8*n+j*lda] += TempC[1];
- C[2+8*n+j*lda] += TempC[2];
- C[3+8*n+j*lda] += TempC[3];
- C[4+8*n+j*lda] += TempC[4];
- C[5+8*n+j*lda] += TempC[5];
- C[6+8*n+j*lda] += TempC[6];
- C[7+8*n+j*lda] += TempC[7];
- }
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < 16; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
-
-
- for( n = 0; n < 4; n++)
- {
-
-
-
-
-
- TempC[0] = TempA[0] * B[(0+8*m)*lda+0+8*n];
- TempC[1] = TempA[0] * B[(0+8*m)*lda+1+8*n];
- TempC[2] = TempA[0] * B[(0+8*m)*lda+2+8*n];
- TempC[3] = TempA[0] * B[(0+8*m)*lda+3+8*n];
- TempC[4] = TempA[0] * B[(0+8*m)*lda+4+8*n];
- TempC[5] = TempA[0] * B[(0+8*m)*lda+5+8*n];
- TempC[6] = TempA[0] * B[(0+8*m)*lda+6+8*n];
- TempC[7] = TempA[0] * B[(0+8*m)*lda+7+8*n];
-
-
- TempC[0] += TempA[1] * B[(1+8*m)*lda+0+8*n];
- TempC[1] += TempA[1] * B[(1+8*m)*lda+1+8*n];
- TempC[2] += TempA[1] * B[(1+8*m)*lda+2+8*n];
- TempC[3] += TempA[1] * B[(1+8*m)*lda+3+8*n];
- TempC[4] += TempA[1] * B[(1+8*m)*lda+4+8*n];
- TempC[5] += TempA[1] * B[(1+8*m)*lda+5+8*n];
- TempC[6] += TempA[1] * B[(1+8*m)*lda+6+8*n];
- TempC[7] += TempA[1] * B[(1+8*m)*lda+7+8*n];
-
-
-
- TempC[0] += TempA[2] * B[(2+8*m)*lda+0+8*n];
- TempC[1] += TempA[2] * B[(2+8*m)*lda+1+8*n];
- TempC[2] += TempA[2] * B[(2+8*m)*lda+2+8*n];
- TempC[3] += TempA[2] * B[(2+8*m)*lda+3+8*n];
- TempC[4] += TempA[2] * B[(2+8*m)*lda+4+8*n];
- TempC[5] += TempA[2] * B[(2+8*m)*lda+5+8*n];
- TempC[6] += TempA[2] * B[(2+8*m)*lda+6+8*n];
- TempC[7] += TempA[2] * B[(2+8*m)*lda+7+8*n];
-
-
-
- TempC[0] += TempA[3] * B[(3+8*m)*lda+0+8*n];
- TempC[1] += TempA[3] * B[(3+8*m)*lda+1+8*n];
- TempC[2] += TempA[3] * B[(3+8*m)*lda+2+8*n];
- TempC[3] += TempA[3] * B[(3+8*m)*lda+3+8*n];
- TempC[4] += TempA[3] * B[(3+8*m)*lda+4+8*n];
- TempC[5] += TempA[3] * B[(3+8*m)*lda+5+8*n];
- TempC[6] += TempA[3] * B[(3+8*m)*lda+6+8*n];
- TempC[7] += TempA[3] * B[(3+8*m)*lda+7+8*n];
-
- TempC[0] += TempA[4] * B[(4+8*m)*lda+0+8*n];
- TempC[1] += TempA[4] * B[(4+8*m)*lda+1+8*n];
- TempC[2] += TempA[4] * B[(4+8*m)*lda+2+8*n];
- TempC[3] += TempA[4] * B[(4+8*m)*lda+3+8*n];
- TempC[4] += TempA[4] * B[(4+8*m)*lda+4+8*n];
- TempC[5] += TempA[4] * B[(4+8*m)*lda+5+8*n];
- TempC[6] += TempA[4] * B[(4+8*m)*lda+6+8*n];
- TempC[7] += TempA[4] * B[(4+8*m)*lda+7+8*n];
-
-
- TempC[0] += TempA[5] * B[(5+8*m)*lda+0+8*n];
- TempC[1] += TempA[5] * B[(5+8*m)*lda+1+8*n];
- TempC[2] += TempA[5] * B[(5+8*m)*lda+2+8*n];
- TempC[3] += TempA[5] * B[(5+8*m)*lda+3+8*n];
- TempC[4] += TempA[5] * B[(5+8*m)*lda+4+8*n];
- TempC[5] += TempA[5] * B[(5+8*m)*lda+5+8*n];
- TempC[6] += TempA[5] * B[(5+8*m)*lda+6+8*n];
- TempC[7] += TempA[5] * B[(5+8*m)*lda+7+8*n];
-
-
-
- TempC[0] += TempA[6] * B[(6+8*m)*lda+0+8*n];
- TempC[1] += TempA[6] * B[(6+8*m)*lda+1+8*n];
- TempC[2] += TempA[6] * B[(6+8*m)*lda+2+8*n];
- TempC[3] += TempA[6] * B[(6+8*m)*lda+3+8*n];
- TempC[4] += TempA[6] * B[(6+8*m)*lda+4+8*n];
- TempC[5] += TempA[6] * B[(6+8*m)*lda+5+8*n];
- TempC[6] += TempA[6] * B[(6+8*m)*lda+6+8*n];
- TempC[7] += TempA[6] * B[(6+8*m)*lda+7+8*n];
-
-
- TempC[0] += TempA[7] * B[(7+8*m)*lda+0+8*n];
- TempC[1] += TempA[7] * B[(7+8*m)*lda+1+8*n];
- TempC[2] += TempA[7] * B[(7+8*m)*lda+2+8*n];
- TempC[3] += TempA[7] * B[(7+8*m)*lda+3+8*n];
- TempC[4] += TempA[7] * B[(7+8*m)*lda+4+8*n];
- TempC[5] += TempA[7] * B[(7+8*m)*lda+5+8*n];
- TempC[6] += TempA[7] * B[(7+8*m)*lda+6+8*n];
- TempC[7] += TempA[7] * B[(7+8*m)*lda+7+8*n];
-
-
-
- C[0+8*n+j*lda] += TempC[0];
- C[1+8*n+j*lda] += TempC[1];
- C[2+8*n+j*lda] += TempC[2];
- C[3+8*n+j*lda] += TempC[3];
- C[4+8*n+j*lda] += TempC[4];
- C[5+8*n+j*lda] += TempC[5];
- C[6+8*n+j*lda] += TempC[6];
- C[7+8*n+j*lda] += TempC[7];
- }
- }
- }
- }
-
- //-----------------------------------------------------------------version 2.13 optimize j
- /*
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
- static __thread data_t TempC[8];
- static __thread int j,m,n,i,k;
-
- if(coreid == 1)
- {
- for ( j = 16; j < 32; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[2] * TempB[0];
- C[1+8*n+j*lda] += TempA[2] * TempB[1];
- C[2+8*n+j*lda] += TempA[2] * TempB[2];
- C[3+8*n+j*lda] += TempA[2] * TempB[3];
- C[4+8*n+j*lda] += TempA[2] * TempB[4];
- C[5+8*n+j*lda] += TempA[2] * TempB[5];
- C[6+8*n+j*lda] += TempA[2] * TempB[6];
- C[7+8*n+j*lda] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[3] * TempB[0];
- C[1+8*n+j*lda] += TempA[3] * TempB[1];
- C[2+8*n+j*lda] += TempA[3] * TempB[2];
- C[3+8*n+j*lda] += TempA[3] * TempB[3];
- C[4+8*n+j*lda] += TempA[3] * TempB[4];
- C[5+8*n+j*lda] += TempA[3] * TempB[5];
- C[6+8*n+j*lda] += TempA[3] * TempB[6];
- C[7+8*n+j*lda] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[4] * TempB[0];
- C[1+8*n+j*lda] += TempA[4] * TempB[1];
- C[2+8*n+j*lda] += TempA[4] * TempB[2];
- C[3+8*n+j*lda] += TempA[4] * TempB[3];
- C[4+8*n+j*lda] += TempA[4] * TempB[4];
- C[5+8*n+j*lda] += TempA[4] * TempB[5];
- C[6+8*n+j*lda] += TempA[4] * TempB[6];
- C[7+8*n+j*lda] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[5] * TempB[0];
- C[1+8*n+j*lda] += TempA[5] * TempB[1];
- C[2+8*n+j*lda] += TempA[5] * TempB[2];
- C[3+8*n+j*lda] += TempA[5] * TempB[3];
- C[4+8*n+j*lda] += TempA[5] * TempB[4];
- C[5+8*n+j*lda] += TempA[5] * TempB[5];
- C[6+8*n+j*lda] += TempA[5] * TempB[6];
- C[7+8*n+j*lda] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[6] * TempB[0];
- C[1+8*n+j*lda] += TempA[6] * TempB[1];
- C[2+8*n+j*lda] += TempA[6] * TempB[2];
- C[3+8*n+j*lda] += TempA[6] * TempB[3];
- C[4+8*n+j*lda] += TempA[6] * TempB[4];
- C[5+8*n+j*lda] += TempA[6] * TempB[5];
- C[6+8*n+j*lda] += TempA[6] * TempB[6];
- C[7+8*n+j*lda] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[7] * TempB[0];
- C[1+8*n+j*lda] += TempA[7] * TempB[1];
- C[2+8*n+j*lda] += TempA[7] * TempB[2];
- C[3+8*n+j*lda] += TempA[7] * TempB[3];
- C[4+8*n+j*lda] += TempA[7] * TempB[4];
- C[5+8*n+j*lda] += TempA[7] * TempB[5];
- C[6+8*n+j*lda] += TempA[7] * TempB[6];
- C[7+8*n+j*lda] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < 16; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[2] * TempB[0];
- C[1+8*n+j*lda] += TempA[2] * TempB[1];
- C[2+8*n+j*lda] += TempA[2] * TempB[2];
- C[3+8*n+j*lda] += TempA[2] * TempB[3];
- C[4+8*n+j*lda] += TempA[2] * TempB[4];
- C[5+8*n+j*lda] += TempA[2] * TempB[5];
- C[6+8*n+j*lda] += TempA[2] * TempB[6];
- C[7+8*n+j*lda] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[3] * TempB[0];
- C[1+8*n+j*lda] += TempA[3] * TempB[1];
- C[2+8*n+j*lda] += TempA[3] * TempB[2];
- C[3+8*n+j*lda] += TempA[3] * TempB[3];
- C[4+8*n+j*lda] += TempA[3] * TempB[4];
- C[5+8*n+j*lda] += TempA[3] * TempB[5];
- C[6+8*n+j*lda] += TempA[3] * TempB[6];
- C[7+8*n+j*lda] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[4] * TempB[0];
- C[1+8*n+j*lda] += TempA[4] * TempB[1];
- C[2+8*n+j*lda] += TempA[4] * TempB[2];
- C[3+8*n+j*lda] += TempA[4] * TempB[3];
- C[4+8*n+j*lda] += TempA[4] * TempB[4];
- C[5+8*n+j*lda] += TempA[4] * TempB[5];
- C[6+8*n+j*lda] += TempA[4] * TempB[6];
- C[7+8*n+j*lda] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[5] * TempB[0];
- C[1+8*n+j*lda] += TempA[5] * TempB[1];
- C[2+8*n+j*lda] += TempA[5] * TempB[2];
- C[3+8*n+j*lda] += TempA[5] * TempB[3];
- C[4+8*n+j*lda] += TempA[5] * TempB[4];
- C[5+8*n+j*lda] += TempA[5] * TempB[5];
- C[6+8*n+j*lda] += TempA[5] * TempB[6];
- C[7+8*n+j*lda] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[6] * TempB[0];
- C[1+8*n+j*lda] += TempA[6] * TempB[1];
- C[2+8*n+j*lda] += TempA[6] * TempB[2];
- C[3+8*n+j*lda] += TempA[6] * TempB[3];
- C[4+8*n+j*lda] += TempA[6] * TempB[4];
- C[5+8*n+j*lda] += TempA[6] * TempB[5];
- C[6+8*n+j*lda] += TempA[6] * TempB[6];
- C[7+8*n+j*lda] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[7] * TempB[0];
- C[1+8*n+j*lda] += TempA[7] * TempB[1];
- C[2+8*n+j*lda] += TempA[7] * TempB[2];
- C[3+8*n+j*lda] += TempA[7] * TempB[3];
- C[4+8*n+j*lda] += TempA[7] * TempB[4];
- C[5+8*n+j*lda] += TempA[7] * TempB[5];
- C[6+8*n+j*lda] += TempA[7] * TempB[6];
- C[7+8*n+j*lda] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- */
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- //-------------------------------------------------------------first working version best 500k
- /*
- static __thread int i, j, k;
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
- }
- }
-
- if(coreid ==1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0;k < lda; k++)
- {
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
-
- }
- }
- }
- }
- */
- //-------------------------------------------------------------version1.1, take read out of inner loop,300k
- /*
- static __thread int i, j, k;
- static __thread data_t TempA;
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += TempA* B[k*lda + i];
- }
- }
- }
- }
-
- if(coreid ==1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0;k < lda; k++)
- {
- TempA = A[j*lda + k];
- for ( i = 0; i < lda; i++)
- {
- C[i + j*lda] += TempA* B[k*lda + i];
- }
- }
- }
- }
- */
- //-------------------------------------------------------------version2.0, read 8 elements in B at one time. 140k mi, MSI117.0k
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k*lda+0+8*n];
- TempB[1] = B[k*lda+1+8*n];
- TempB[2] = B[k*lda+2+8*n];
- TempB[3] = B[k*lda+3+8*n];
- TempB[4] = B[k*lda+4+8*n];
- TempB[5] = B[k*lda+5+8*n];
- TempB[6] = B[k*lda+6+8*n];
- TempB[7] = B[k*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA * TempB[0];
- C[1+8*n+j*lda] += TempA * TempB[1];
- C[2+8*n+j*lda] += TempA * TempB[2];
- C[3+8*n+j*lda] += TempA * TempB[3];
- C[4+8*n+j*lda] += TempA * TempB[4];
- C[5+8*n+j*lda] += TempA * TempB[5];
- C[6+8*n+j*lda] += TempA * TempB[6];
- C[7+8*n+j*lda] += TempA * TempB[7];
-
- }
-
- }
- }
- }
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k*lda+0+8*n];
- TempB[1] = B[k*lda+1+8*n];
- TempB[2] = B[k*lda+2+8*n];
- TempB[3] = B[k*lda+3+8*n];
- TempB[4] = B[k*lda+4+8*n];
- TempB[5] = B[k*lda+5+8*n];
- TempB[6] = B[k*lda+6+8*n];
- TempB[7] = B[k*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA * TempB[0];
- C[1+8*n+j*lda] += TempA * TempB[1];
- C[2+8*n+j*lda] += TempA * TempB[2];
- C[3+8*n+j*lda] += TempA * TempB[3];
- C[4+8*n+j*lda] += TempA * TempB[4];
- C[5+8*n+j*lda] += TempA * TempB[5];
- C[6+8*n+j*lda] += TempA * TempB[6];
- C[7+8*n+j*lda] += TempA * TempB[7];
-
- }
-
- }
- }
- }
- */
-
- //-------------------------------------------------------------version2.1, optimize k. 700k. bad move to v2.2.
- //-------------------------------------------------------------version2.9 take off all inner loops for both cores, MSI,109K. MI 182k
- //-------------------------------------------------------------version2.10 use i= j*lda inside the n loop increase speed. but not out m and n. tried replace first 3, get 104.9k
- /*
- static __thread int j, m, i,n;
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- i = j*lda;
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[0] * TempB[0];
- C[1+8*n+i] += TempA[0] * TempB[1];
- C[2+8*n+i] += TempA[0] * TempB[2];
- C[3+8*n+i] += TempA[0] * TempB[3];
- C[4+8*n+i] += TempA[0] * TempB[4];
- C[5+8*n+i] += TempA[0] * TempB[5];
- C[6+8*n+i] += TempA[0] * TempB[6];
- C[7+8*n+i] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[1] * TempB[0];
- C[1+8*n+i] += TempA[1] * TempB[1];
- C[2+8*n+i] += TempA[1] * TempB[2];
- C[3+8*n+i] += TempA[1] * TempB[3];
- C[4+8*n+i] += TempA[1] * TempB[4];
- C[5+8*n+i] += TempA[1] * TempB[5];
- C[6+8*n+i] += TempA[1] * TempB[6];
- C[7+8*n+i] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[2] * TempB[0];
- C[1+8*n+i] += TempA[2] * TempB[1];
- C[2+8*n+i] += TempA[2] * TempB[2];
- C[3+8*n+i] += TempA[2] * TempB[3];
- C[4+8*n+i] += TempA[2] * TempB[4];
- C[5+8*n+i] += TempA[2] * TempB[5];
- C[6+8*n+i] += TempA[2] * TempB[6];
- C[7+8*n+i] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[3] * TempB[0];
- C[1+8*n+i] += TempA[3] * TempB[1];
- C[2+8*n+i] += TempA[3] * TempB[2];
- C[3+8*n+i] += TempA[3] * TempB[3];
- C[4+8*n+i] += TempA[3] * TempB[4];
- C[5+8*n+i] += TempA[3] * TempB[5];
- C[6+8*n+i] += TempA[3] * TempB[6];
- C[7+8*n+i] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[4] * TempB[0];
- C[1+8*n+i] += TempA[4] * TempB[1];
- C[2+8*n+i] += TempA[4] * TempB[2];
- C[3+8*n+i] += TempA[4] * TempB[3];
- C[4+8*n+i] += TempA[4] * TempB[4];
- C[5+8*n+i] += TempA[4] * TempB[5];
- C[6+8*n+i] += TempA[4] * TempB[6];
- C[7+8*n+i] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[5] * TempB[0];
- C[1+8*n+i] += TempA[5] * TempB[1];
- C[2+8*n+i] += TempA[5] * TempB[2];
- C[3+8*n+i] += TempA[5] * TempB[3];
- C[4+8*n+i] += TempA[5] * TempB[4];
- C[5+8*n+i] += TempA[5] * TempB[5];
- C[6+8*n+i] += TempA[5] * TempB[6];
- C[7+8*n+i] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[6] * TempB[0];
- C[1+8*n+i] += TempA[6] * TempB[1];
- C[2+8*n+i] += TempA[6] * TempB[2];
- C[3+8*n+i] += TempA[6] * TempB[3];
- C[4+8*n+i] += TempA[6] * TempB[4];
- C[5+8*n+i] += TempA[6] * TempB[5];
- C[6+8*n+i] += TempA[6] * TempB[6];
- C[7+8*n+i] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[7] * TempB[0];
- C[1+8*n+i] += TempA[7] * TempB[1];
- C[2+8*n+i] += TempA[7] * TempB[2];
- C[3+8*n+i] += TempA[7] * TempB[3];
- C[4+8*n+i] += TempA[7] * TempB[4];
- C[5+8*n+i] += TempA[7] * TempB[5];
- C[6+8*n+i] += TempA[7] * TempB[6];
- C[7+8*n+i] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- i = j*lda;
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[0] * TempB[0];
- C[1+8*n+i] += TempA[0] * TempB[1];
- C[2+8*n+i] += TempA[0] * TempB[2];
- C[3+8*n+i] += TempA[0] * TempB[3];
- C[4+8*n+i] += TempA[0] * TempB[4];
- C[5+8*n+i] += TempA[0] * TempB[5];
- C[6+8*n+i] += TempA[0] * TempB[6];
- C[7+8*n+i] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[1] * TempB[0];
- C[1+8*n+i] += TempA[1] * TempB[1];
- C[2+8*n+i] += TempA[1] * TempB[2];
- C[3+8*n+i] += TempA[1] * TempB[3];
- C[4+8*n+i] += TempA[1] * TempB[4];
- C[5+8*n+i] += TempA[1] * TempB[5];
- C[6+8*n+i] += TempA[1] * TempB[6];
- C[7+8*n+i] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[2] * TempB[0];
- C[1+8*n+i] += TempA[2] * TempB[1];
- C[2+8*n+i] += TempA[2] * TempB[2];
- C[3+8*n+i] += TempA[2] * TempB[3];
- C[4+8*n+i] += TempA[2] * TempB[4];
- C[5+8*n+i] += TempA[2] * TempB[5];
- C[6+8*n+i] += TempA[2] * TempB[6];
- C[7+8*n+i] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[3] * TempB[0];
- C[1+8*n+i] += TempA[3] * TempB[1];
- C[2+8*n+i] += TempA[3] * TempB[2];
- C[3+8*n+i] += TempA[3] * TempB[3];
- C[4+8*n+i] += TempA[3] * TempB[4];
- C[5+8*n+i] += TempA[3] * TempB[5];
- C[6+8*n+i] += TempA[3] * TempB[6];
- C[7+8*n+i] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[4] * TempB[0];
- C[1+8*n+i] += TempA[4] * TempB[1];
- C[2+8*n+i] += TempA[4] * TempB[2];
- C[3+8*n+i] += TempA[4] * TempB[3];
- C[4+8*n+i] += TempA[4] * TempB[4];
- C[5+8*n+i] += TempA[4] * TempB[5];
- C[6+8*n+i] += TempA[4] * TempB[6];
- C[7+8*n+i] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[5] * TempB[0];
- C[1+8*n+i] += TempA[5] * TempB[1];
- C[2+8*n+i] += TempA[5] * TempB[2];
- C[3+8*n+i] += TempA[5] * TempB[3];
- C[4+8*n+i] += TempA[5] * TempB[4];
- C[5+8*n+i] += TempA[5] * TempB[5];
- C[6+8*n+i] += TempA[5] * TempB[6];
- C[7+8*n+i] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[6] * TempB[0];
- C[1+8*n+i] += TempA[6] * TempB[1];
- C[2+8*n+i] += TempA[6] * TempB[2];
- C[3+8*n+i] += TempA[6] * TempB[3];
- C[4+8*n+i] += TempA[6] * TempB[4];
- C[5+8*n+i] += TempA[6] * TempB[5];
- C[6+8*n+i] += TempA[6] * TempB[6];
- C[7+8*n+i] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[7] * TempB[0];
- C[1+8*n+i] += TempA[7] * TempB[1];
- C[2+8*n+i] += TempA[7] * TempB[2];
- C[3+8*n+i] += TempA[7] * TempB[3];
- C[4+8*n+i] += TempA[7] * TempB[4];
- C[5+8*n+i] += TempA[7] * TempB[5];
- C[6+8*n+i] += TempA[7] * TempB[6];
- C[7+8*n+i] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
-
- */
- //-------------------------------------------------------------version2.2, optimize k. from 4 instead of 8 like v2.1, random failing on MI, unknown reason, MSI,350K, take off each inner loop for core 0 260k, both cores 134k
- //-------------------------------------------------------------try false sharing for core 0, 136k.
- /*
- static __thread int j, m, n;
- static __thread data_t TempA[4];
- static __thread data_t TempB[4];
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( m = 0; m < 8; m++ )
- {
- TempA[0] = A[j*lda+0+4*m];
- TempA[1] = A[j*lda+1+4*m];
- TempA[2] = A[j*lda+2+4*m];
- TempA[3] = A[j*lda+3+4*m];
-
- for( n = 0; n < 8; n++)
- {
-
- TempB[0] = B[(0+4*m)*lda+0+4*n];
- TempB[1] = B[(0+4*m)*lda+1+4*n];
- TempB[2] = B[(0+4*m)*lda+2+4*n];
- TempB[3] = B[(0+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[0] * TempB[0];
- C[1+4*n+j*lda] += TempA[0] * TempB[1];
- C[2+4*n+j*lda] += TempA[0] * TempB[2];
- C[3+4*n+j*lda] += TempA[0] * TempB[3];
-
-
-
-
-
- TempB[0] = B[(1+4*m)*lda+0+4*n];
- TempB[1] = B[(1+4*m)*lda+1+4*n];
- TempB[2] = B[(1+4*m)*lda+2+4*n];
- TempB[3] = B[(1+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[1] * TempB[0];
- C[1+4*n+j*lda] += TempA[1] * TempB[1];
- C[2+4*n+j*lda] += TempA[1] * TempB[2];
- C[3+4*n+j*lda] += TempA[1] * TempB[3];
-
-
-
- TempB[0] = B[(2+4*m)*lda+0+4*n];
- TempB[1] = B[(2+4*m)*lda+1+4*n];
- TempB[2] = B[(2+4*m)*lda+2+4*n];
- TempB[3] = B[(2+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[2] * TempB[0];
- C[1+4*n+j*lda] += TempA[2] * TempB[1];
- C[2+4*n+j*lda] += TempA[2] * TempB[2];
- C[3+4*n+j*lda] += TempA[2] * TempB[3];
-
-
-
-
- TempB[0] = B[(3+4*m)*lda+0+4*n];
- TempB[1] = B[(3+4*m)*lda+1+4*n];
- TempB[2] = B[(3+4*m)*lda+2+4*n];
- TempB[3] = B[(3+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[3] * TempB[0];
- C[1+4*n+j*lda] += TempA[3] * TempB[1];
- C[2+4*n+j*lda] += TempA[3] * TempB[2];
- C[3+4*n+j*lda] += TempA[3] * TempB[3];
-
-
- }
- }
- }
- }
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( m = 0; m < 8; m++ )
- {
- TempA[0] = A[j*lda+0+4*m];
- TempA[1] = A[j*lda+1+4*m];
- TempA[2] = A[j*lda+2+4*m];
- TempA[3] = A[j*lda+3+4*m];
-
- for( n = 0; n < 8; n++)
- {
-
-
-
-
-
-
-
- TempB[0] = B[(1+4*m)*lda+0+4*n];
- TempB[1] = B[(1+4*m)*lda+1+4*n];
- TempB[2] = B[(1+4*m)*lda+2+4*n];
- TempB[3] = B[(1+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[1] * TempB[0];
- C[1+4*n+j*lda] += TempA[1] * TempB[1];
- C[2+4*n+j*lda] += TempA[1] * TempB[2];
- C[3+4*n+j*lda] += TempA[1] * TempB[3];
-
-
-
- TempB[0] = B[(2+4*m)*lda+0+4*n];
- TempB[1] = B[(2+4*m)*lda+1+4*n];
- TempB[2] = B[(2+4*m)*lda+2+4*n];
- TempB[3] = B[(2+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[2] * TempB[0];
- C[1+4*n+j*lda] += TempA[2] * TempB[1];
- C[2+4*n+j*lda] += TempA[2] * TempB[2];
- C[3+4*n+j*lda] += TempA[2] * TempB[3];
-
-
-
-
- TempB[0] = B[(3+4*m)*lda+0+4*n];
- TempB[1] = B[(3+4*m)*lda+1+4*n];
- TempB[2] = B[(3+4*m)*lda+2+4*n];
- TempB[3] = B[(3+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[3] * TempB[0];
- C[1+4*n+j*lda] += TempA[3] * TempB[1];
- C[2+4*n+j*lda] += TempA[3] * TempB[2];
- C[3+4*n+j*lda] += TempA[3] * TempB[3];
-
- TempB[0] = B[(0+4*m)*lda+0+4*n];
- TempB[1] = B[(0+4*m)*lda+1+4*n];
- TempB[2] = B[(0+4*m)*lda+2+4*n];
- TempB[3] = B[(0+4*m)*lda+3+4*n];
-
-
- C[0+4*n+j*lda] += TempA[0] * TempB[0];
- C[1+4*n+j*lda] += TempA[0] * TempB[1];
- C[2+4*n+j*lda] += TempA[0] * TempB[2];
- C[3+4*n+j*lda] += TempA[0] * TempB[3];
-
-
- }
- }
- }
- }
- */
-
-
-
- //-------------------------------------------------------------version2.3, read 8 elements in B at one time. make k to 2. 150k mi 128k msi. worse than v2.0
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA[2];
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( m = 0; m < 16; m++ )
- {
- TempA[0] = A[j*lda + 0 + 2*m];
- TempA[1] = A[j*lda + 1 + 2*m];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[2*m*lda+0+8*n];
- TempB[1] = B[2*m*lda+1+8*n];
- TempB[2] = B[2*m*lda+2+8*n];
- TempB[3] = B[2*m*lda+3+8*n];
- TempB[4] = B[2*m*lda+4+8*n];
- TempB[5] = B[2*m*lda+5+8*n];
- TempB[6] = B[2*m*lda+6+8*n];
- TempB[7] = B[2*m*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
- TempB[0] = B[(1+2*m)*lda+0+8*n];
- TempB[1] = B[(1+2*m)*lda+1+8*n];
- TempB[2] = B[(1+2*m)*lda+2+8*n];
- TempB[3] = B[(1+2*m)*lda+3+8*n];
- TempB[4] = B[(1+2*m)*lda+4+8*n];
- TempB[5] = B[(1+2*m)*lda+5+8*n];
- TempB[6] = B[(1+2*m)*lda+6+8*n];
- TempB[7] = B[(1+2*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
- }
-
- }
- }
- }
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( m = 0; m < 16; m++ )
- {
- TempA[0] = A[j*lda + 0 + 2*m];
- TempA[1] = A[j*lda + 1 + 2*m];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[2*m*lda+0+8*n];
- TempB[1] = B[2*m*lda+1+8*n];
- TempB[2] = B[2*m*lda+2+8*n];
- TempB[3] = B[2*m*lda+3+8*n];
- TempB[4] = B[2*m*lda+4+8*n];
- TempB[5] = B[2*m*lda+5+8*n];
- TempB[6] = B[2*m*lda+6+8*n];
- TempB[7] = B[2*m*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
- TempB[0] = B[(1+2*m)*lda+0+8*n];
- TempB[1] = B[(1+2*m)*lda+1+8*n];
- TempB[2] = B[(1+2*m)*lda+2+8*n];
- TempB[3] = B[(1+2*m)*lda+3+8*n];
- TempB[4] = B[(1+2*m)*lda+4+8*n];
- TempB[5] = B[(1+2*m)*lda+5+8*n];
- TempB[6] = B[(1+2*m)*lda+6+8*n];
- TempB[7] = B[(1+2*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
- }
-
- }
- }
- }
- */
- //-------------------------------------------------------------version2.4, read 4 170k and 16 140k, error because not enough space elements in B at one time.
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[16];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
- TempB[8] = B[k*lda+8+16*n];
- TempB[9] = B[k*lda+9+16*n];
- TempB[10] = B[k*lda+10+16*n];
- TempB[11] = B[k*lda+11+16*n];
- TempB[12] = B[k*lda+12+16*n];
- TempB[13] = B[k*lda+13+16*n];
- TempB[14] = B[k*lda+14+16*n];
- TempB[15] = B[k*lda+15+16*n];
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
- C[8+16*n+j*lda] += TempA * TempB[8];
- C[9+16*n+j*lda] += TempA * TempB[9];
- C[10+16*n+j*lda] += TempA * TempB[10];
- C[11+16*n+j*lda] += TempA * TempB[11];
- C[12+16*n+j*lda] += TempA * TempB[12];
- C[13+16*n+j*lda] += TempA * TempB[13];
- C[14+16*n+j*lda] += TempA * TempB[14];
- C[15+16*n+j*lda] += TempA * TempB[15];
-
-
-
- }
-
- }
- }
- }
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
- TempB[8] = B[k*lda+8+16*n];
- TempB[9] = B[k*lda+9+16*n];
- TempB[10] = B[k*lda+10+16*n];
- TempB[11] = B[k*lda+11+16*n];
- TempB[12] = B[k*lda+12+16*n];
- TempB[13] = B[k*lda+13+16*n];
- TempB[14] = B[k*lda+14+16*n];
- TempB[15] = B[k*lda+15+16*n];
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
- C[8+16*n+j*lda] += TempA * TempB[8];
- C[9+16*n+j*lda] += TempA * TempB[9];
- C[10+16*n+j*lda] += TempA * TempB[10];
- C[11+16*n+j*lda] += TempA * TempB[11];
- C[12+16*n+j*lda] += TempA * TempB[12];
- C[13+16*n+j*lda] += TempA * TempB[13];
- C[14+16*n+j*lda] += TempA * TempB[14];
- C[15+16*n+j*lda] += TempA * TempB[15];
-
-
-
- }
-
- }
- }
- }
-
- */
- //-------------------------------------------------------------version2.5, read 10 elements in B at one time. has corner cases. Turns out it hangs.
- /*
- static __thread int j, k, n;
- static __thread data_t TempA;
- static __thread data_t TempB[10];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 3; n++)
- {
- TempB[0] = B[k*lda+0+10*n];
- TempB[1] = B[k*lda+1+10*n];
- TempB[2] = B[k*lda+2+10*n];
- TempB[3] = B[k*lda+3+10*n];
- TempB[4] = B[k*lda+4+10*n];
- TempB[5] = B[k*lda+5+10*n];
- TempB[6] = B[k*lda+6+10*n];
- TempB[7] = B[k*lda+7+10*n];
- TempB[8] = B[k*lda+8+10*n];
- TempB[9] = B[k*lda+9+10*n];
-
- C[0+10*n+j*lda] += TempA * TempB[0];
- C[1+10*n+j*lda] += TempA * TempB[1];
- C[2+10*n+j*lda] += TempA * TempB[2];
- C[3+10*n+j*lda] += TempA * TempB[3];
- C[4+10*n+j*lda] += TempA * TempB[4];
- C[5+10*n+j*lda] += TempA * TempB[5];
- C[6+10*n+j*lda] += TempA * TempB[6];
- C[7+10*n+j*lda] += TempA * TempB[7];
- C[8+10*n+j*lda] += TempA * TempB[8];
- C[9+10*n+j*lda] += TempA * TempB[9];
- }
- TempB[0] = B[k*lda+30];
- TempB[1] = B[k*lda+31];
- C[30+j*lda] += TempA * TempB[0];
- C[31+j*lda] += TempA * TempB[1];
- }
- }
- }
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 3; n++)
- {
- TempB[0] = B[k*lda+0+10*n];
- TempB[1] = B[k*lda+1+10*n];
- TempB[2] = B[k*lda+2+10*n];
- TempB[3] = B[k*lda+3+10*n];
- TempB[4] = B[k*lda+4+10*n];
- TempB[5] = B[k*lda+5+10*n];
- TempB[6] = B[k*lda+6+10*n];
- TempB[7] = B[k*lda+7+10*n];
- TempB[8] = B[k*lda+8+10*n];
- TempB[9] = B[k*lda+9+10*n];
-
- C[0+10*n+j*lda] += TempA * TempB[0];
- C[1+10*n+j*lda] += TempA * TempB[1];
- C[2+10*n+j*lda] += TempA * TempB[2];
- C[3+10*n+j*lda] += TempA * TempB[3];
- C[4+10*n+j*lda] += TempA * TempB[4];
- C[5+10*n+j*lda] += TempA * TempB[5];
- C[6+10*n+j*lda] += TempA * TempB[6];
- C[7+10*n+j*lda] += TempA * TempB[7];
- C[8+10*n+j*lda] += TempA * TempB[8];
- C[9+10*n+j*lda] += TempA * TempB[9];
- }
- TempB[0] = B[k*lda+30];
- TempB[1] = B[k*lda+31];
- C[30+j*lda] += TempA * TempB[0];
- C[31+j*lda] += TempA * TempB[1];
- }
- }
- }
-
- */
-
- //-------------------------------------------------------------version2.6, optimize 2.0. take off n loop and tried different order of reading B
- /*
- static __thread int j, k, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
-
- TempB[0] = B[k*lda+0];
- TempB[1] = B[k*lda+1];
- TempB[2] = B[k*lda+2];
- TempB[3] = B[k*lda+3];
- TempB[4] = B[k*lda+4];
- TempB[5] = B[k*lda+5];
- TempB[6] = B[k*lda+6];
- TempB[7] = B[k*lda+7];
-
- C[0+j*lda] += TempA * TempB[0];
- C[1+j*lda] += TempA * TempB[1];
- C[2+j*lda] += TempA * TempB[2];
- C[3+j*lda] += TempA * TempB[3];
- C[4+j*lda] += TempA * TempB[4];
- C[5+j*lda] += TempA * TempB[5];
- C[6+j*lda] += TempA * TempB[6];
- C[7+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+8];
- TempB[1] = B[k*lda+9];
- TempB[2] = B[k*lda+10];
- TempB[3] = B[k*lda+11];
- TempB[4] = B[k*lda+12];
- TempB[5] = B[k*lda+13];
- TempB[6] = B[k*lda+14];
- TempB[7] = B[k*lda+15];
-
- C[8+j*lda] += TempA * TempB[0];
- C[9+j*lda] += TempA * TempB[1];
- C[10+j*lda] += TempA * TempB[2];
- C[11+j*lda] += TempA * TempB[3];
- C[12+j*lda] += TempA * TempB[4];
- C[13+j*lda] += TempA * TempB[5];
- C[14+j*lda] += TempA * TempB[6];
- C[15+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+16];
- TempB[1] = B[k*lda+17];
- TempB[2] = B[k*lda+18];
- TempB[3] = B[k*lda+19];
- TempB[4] = B[k*lda+20];
- TempB[5] = B[k*lda+21];
- TempB[6] = B[k*lda+22];
- TempB[7] = B[k*lda+23];
-
- C[16+j*lda] += TempA * TempB[0];
- C[17+j*lda] += TempA * TempB[1];
- C[18+j*lda] += TempA * TempB[2];
- C[19+j*lda] += TempA * TempB[3];
- C[20+j*lda] += TempA * TempB[4];
- C[21+j*lda] += TempA * TempB[5];
- C[22+j*lda] += TempA * TempB[6];
- C[23+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+24];
- TempB[1] = B[k*lda+25];
- TempB[2] = B[k*lda+26];
- TempB[3] = B[k*lda+27];
- TempB[4] = B[k*lda+28];
- TempB[5] = B[k*lda+29];
- TempB[6] = B[k*lda+30];
- TempB[7] = B[k*lda+31];
-
- C[24+j*lda] += TempA * TempB[0];
- C[25+j*lda] += TempA * TempB[1];
- C[26+j*lda] += TempA * TempB[2];
- C[27+j*lda] += TempA * TempB[3];
- C[28+j*lda] += TempA * TempB[4];
- C[29+j*lda] += TempA * TempB[5];
- C[30+j*lda] += TempA * TempB[6];
- C[31+j*lda] += TempA * TempB[7];
-
-
-
- }
- }
- }
-
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
-
-
- TempB[0] = B[k*lda+24];
- TempB[1] = B[k*lda+25];
- TempB[2] = B[k*lda+26];
- TempB[3] = B[k*lda+27];
- TempB[4] = B[k*lda+28];
- TempB[5] = B[k*lda+29];
- TempB[6] = B[k*lda+30];
- TempB[7] = B[k*lda+31];
-
- C[24+j*lda] += TempA * TempB[0];
- C[25+j*lda] += TempA * TempB[1];
- C[26+j*lda] += TempA * TempB[2];
- C[27+j*lda] += TempA * TempB[3];
- C[28+j*lda] += TempA * TempB[4];
- C[29+j*lda] += TempA * TempB[5];
- C[30+j*lda] += TempA * TempB[6];
- C[31+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+0];
- TempB[1] = B[k*lda+1];
- TempB[2] = B[k*lda+2];
- TempB[3] = B[k*lda+3];
- TempB[4] = B[k*lda+4];
- TempB[5] = B[k*lda+5];
- TempB[6] = B[k*lda+6];
- TempB[7] = B[k*lda+7];
-
- C[0+j*lda] += TempA * TempB[0];
- C[1+j*lda] += TempA * TempB[1];
- C[2+j*lda] += TempA * TempB[2];
- C[3+j*lda] += TempA * TempB[3];
- C[4+j*lda] += TempA * TempB[4];
- C[5+j*lda] += TempA * TempB[5];
- C[6+j*lda] += TempA * TempB[6];
- C[7+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+8];
- TempB[1] = B[k*lda+9];
- TempB[2] = B[k*lda+10];
- TempB[3] = B[k*lda+11];
- TempB[4] = B[k*lda+12];
- TempB[5] = B[k*lda+13];
- TempB[6] = B[k*lda+14];
- TempB[7] = B[k*lda+15];
-
- C[8+j*lda] += TempA * TempB[0];
- C[9+j*lda] += TempA * TempB[1];
- C[10+j*lda] += TempA * TempB[2];
- C[11+j*lda] += TempA * TempB[3];
- C[12+j*lda] += TempA * TempB[4];
- C[13+j*lda] += TempA * TempB[5];
- C[14+j*lda] += TempA * TempB[6];
- C[15+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+16];
- TempB[1] = B[k*lda+17];
- TempB[2] = B[k*lda+18];
- TempB[3] = B[k*lda+19];
- TempB[4] = B[k*lda+20];
- TempB[5] = B[k*lda+21];
- TempB[6] = B[k*lda+22];
- TempB[7] = B[k*lda+23];
-
- C[16+j*lda] += TempA * TempB[0];
- C[17+j*lda] += TempA * TempB[1];
- C[18+j*lda] += TempA * TempB[2];
- C[19+j*lda] += TempA * TempB[3];
- C[20+j*lda] += TempA * TempB[4];
- C[21+j*lda] += TempA * TempB[5];
- C[22+j*lda] += TempA * TempB[6];
- C[23+j*lda] += TempA * TempB[7];
-
-
-
-
-
-
- }
- }
- }
- */
- //-------------------------------------------------------------version2.7, use m=l*da, i=k*lda,out of stack, only i, MI 150k, only m, MSI 117.9k slower than v2.0
- /*
- static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- m = j * lda;
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[m+ k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k *lda+0+8*n];
- TempB[1] = B[k *lda+1+8*n];
- TempB[2] = B[k *lda+2+8*n];
- TempB[3] = B[k *lda+3+8*n];
- TempB[4] = B[k *lda+4+8*n];
- TempB[5] = B[k *lda+5+8*n];
- TempB[6] = B[k *lda+6+8*n];
- TempB[7] = B[k *lda+7+8*n];
-
- C[0+8*n+m] += TempA * TempB[0];
- C[1+8*n+m] += TempA * TempB[1];
- C[2+8*n+m] += TempA * TempB[2];
- C[3+8*n+m] += TempA * TempB[3];
- C[4+8*n+m] += TempA * TempB[4];
- C[5+8*n+m] += TempA * TempB[5];
- C[6+8*n+m] += TempA * TempB[6];
- C[7+8*n+m] += TempA * TempB[7];
-
- }
-
- }
- }
- }
-if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- m = j * lda;
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[m+ k];
- for( n = 0; n < 4; n++)
- {
-
- TempB[0] = B[k *lda+0+8*n];
- TempB[1] = B[k *lda+1+8*n];
- TempB[2] = B[k *lda+2+8*n];
- TempB[3] = B[k *lda+3+8*n];
- TempB[4] = B[k *lda+4+8*n];
- TempB[5] = B[k *lda+5+8*n];
- TempB[6] = B[k *lda+6+8*n];
- TempB[7] = B[k *lda+7+8*n];
-
- C[0+8*n+m] += TempA * TempB[0];
- C[1+8*n+m] += TempA * TempB[1];
- C[2+8*n+m] += TempA * TempB[2];
- C[3+8*n+m] += TempA * TempB[3];
- C[4+8*n+m] += TempA * TempB[4];
- C[5+8*n+m] += TempA * TempB[5];
- C[6+8*n+m] += TempA * TempB[6];
- C[7+8*n+m] += TempA * TempB[7];
-
- }
-
- }
- }
- }
- */
-//-------------------------------------------------------------version2.8 deal with false sharing, MSI,118K vs v2.0 117.0K. MI 147.629K.
-/*
-static __thread int i, j, k, m, n;
- static __thread data_t TempA;
- static __thread data_t TempB[8];
-
- if(coreid == 0)
- {
- for ( j = 0; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
-
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+8+16*n];
- TempB[1] = B[k*lda+9+16*n];
- TempB[2] = B[k*lda+10+16*n];
- TempB[3] = B[k*lda+11+16*n];
- TempB[4] = B[k*lda+12+16*n];
- TempB[5] = B[k*lda+13+16*n];
- TempB[6] = B[k*lda+14+16*n];
- TempB[7] = B[k*lda+15+16*n];
-
- C[8+16*n+j*lda] += TempA * TempB[0];
- C[9+16*n+j*lda] += TempA * TempB[1];
- C[10+16*n+j*lda] += TempA * TempB[2];
- C[11+16*n+j*lda] += TempA * TempB[3];
- C[12+16*n+j*lda] += TempA * TempB[4];
- C[13+16*n+j*lda] += TempA * TempB[5];
- C[14+16*n+j*lda] += TempA * TempB[6];
- C[15+16*n+j*lda] += TempA * TempB[7];
-
-
-
- }
-
- }
- }
- }
- if(coreid == 1)
- {
- for ( j = 1; j < lda; j+=2 )
- {
- for ( k = 0; k < lda; k++ )
- {
- TempA = A[j*lda + k];
- for( n = 0; n < 2; n++)
- {
-
-
-
- TempB[0] = B[k*lda+8+16*n];
- TempB[1] = B[k*lda+9+16*n];
- TempB[2] = B[k*lda+10+16*n];
- TempB[3] = B[k*lda+11+16*n];
- TempB[4] = B[k*lda+12+16*n];
- TempB[5] = B[k*lda+13+16*n];
- TempB[6] = B[k*lda+14+16*n];
- TempB[7] = B[k*lda+15+16*n];
-
- C[8+16*n+j*lda] += TempA * TempB[0];
- C[9+16*n+j*lda] += TempA * TempB[1];
- C[10+16*n+j*lda] += TempA * TempB[2];
- C[11+16*n+j*lda] += TempA * TempB[3];
- C[12+16*n+j*lda] += TempA * TempB[4];
- C[13+16*n+j*lda] += TempA * TempB[5];
- C[14+16*n+j*lda] += TempA * TempB[6];
- C[15+16*n+j*lda] += TempA * TempB[7];
-
- TempB[0] = B[k*lda+0+16*n];
- TempB[1] = B[k*lda+1+16*n];
- TempB[2] = B[k*lda+2+16*n];
- TempB[3] = B[k*lda+3+16*n];
- TempB[4] = B[k*lda+4+16*n];
- TempB[5] = B[k*lda+5+16*n];
- TempB[6] = B[k*lda+6+16*n];
- TempB[7] = B[k*lda+7+16*n];
-
-
-
- C[0+16*n+j*lda] += TempA * TempB[0];
- C[1+16*n+j*lda] += TempA * TempB[1];
- C[2+16*n+j*lda] += TempA * TempB[2];
- C[3+16*n+j*lda] += TempA * TempB[3];
- C[4+16*n+j*lda] += TempA * TempB[4];
- C[5+16*n+j*lda] += TempA * TempB[5];
- C[6+16*n+j*lda] += TempA * TempB[6];
- C[7+16*n+j*lda] += TempA * TempB[7];
-
-
- }
-
- }
- }
- }
- */
-
- //----------------------------------------------------------------version 2.11 optmize j,use core 1 j from 0 to 15 MSI 98k i = j*lda
- //----------------------------------------------------------------version 2.12 not use i = j *lda MSI 95k
- static __thread data_t TempA[8];
- static __thread data_t TempB[8];
- static __thread int j,m,n,i,k;
-
- if(coreid == 1)
- {
- for ( j = 16; j < 32; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- /*
- i = j*lda;
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[0] * TempB[0];
- C[1+8*n+i] += TempA[0] * TempB[1];
- C[2+8*n+i] += TempA[0] * TempB[2];
- C[3+8*n+i] += TempA[0] * TempB[3];
- C[4+8*n+i] += TempA[0] * TempB[4];
- C[5+8*n+i] += TempA[0] * TempB[5];
- C[6+8*n+i] += TempA[0] * TempB[6];
- C[7+8*n+i] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[1] * TempB[0];
- C[1+8*n+i] += TempA[1] * TempB[1];
- C[2+8*n+i] += TempA[1] * TempB[2];
- C[3+8*n+i] += TempA[1] * TempB[3];
- C[4+8*n+i] += TempA[1] * TempB[4];
- C[5+8*n+i] += TempA[1] * TempB[5];
- C[6+8*n+i] += TempA[1] * TempB[6];
- C[7+8*n+i] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[2] * TempB[0];
- C[1+8*n+i] += TempA[2] * TempB[1];
- C[2+8*n+i] += TempA[2] * TempB[2];
- C[3+8*n+i] += TempA[2] * TempB[3];
- C[4+8*n+i] += TempA[2] * TempB[4];
- C[5+8*n+i] += TempA[2] * TempB[5];
- C[6+8*n+i] += TempA[2] * TempB[6];
- C[7+8*n+i] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[3] * TempB[0];
- C[1+8*n+i] += TempA[3] * TempB[1];
- C[2+8*n+i] += TempA[3] * TempB[2];
- C[3+8*n+i] += TempA[3] * TempB[3];
- C[4+8*n+i] += TempA[3] * TempB[4];
- C[5+8*n+i] += TempA[3] * TempB[5];
- C[6+8*n+i] += TempA[3] * TempB[6];
- C[7+8*n+i] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[4] * TempB[0];
- C[1+8*n+i] += TempA[4] * TempB[1];
- C[2+8*n+i] += TempA[4] * TempB[2];
- C[3+8*n+i] += TempA[4] * TempB[3];
- C[4+8*n+i] += TempA[4] * TempB[4];
- C[5+8*n+i] += TempA[4] * TempB[5];
- C[6+8*n+i] += TempA[4] * TempB[6];
- C[7+8*n+i] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[5] * TempB[0];
- C[1+8*n+i] += TempA[5] * TempB[1];
- C[2+8*n+i] += TempA[5] * TempB[2];
- C[3+8*n+i] += TempA[5] * TempB[3];
- C[4+8*n+i] += TempA[5] * TempB[4];
- C[5+8*n+i] += TempA[5] * TempB[5];
- C[6+8*n+i] += TempA[5] * TempB[6];
- C[7+8*n+i] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[6] * TempB[0];
- C[1+8*n+i] += TempA[6] * TempB[1];
- C[2+8*n+i] += TempA[6] * TempB[2];
- C[3+8*n+i] += TempA[6] * TempB[3];
- C[4+8*n+i] += TempA[6] * TempB[4];
- C[5+8*n+i] += TempA[6] * TempB[5];
- C[6+8*n+i] += TempA[6] * TempB[6];
- C[7+8*n+i] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[7] * TempB[0];
- C[1+8*n+i] += TempA[7] * TempB[1];
- C[2+8*n+i] += TempA[7] * TempB[2];
- C[3+8*n+i] += TempA[7] * TempB[3];
- C[4+8*n+i] += TempA[7] * TempB[4];
- C[5+8*n+i] += TempA[7] * TempB[5];
- C[6+8*n+i] += TempA[7] * TempB[6];
- C[7+8*n+i] += TempA[7] * TempB[7];
-
- */
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[2] * TempB[0];
- C[1+8*n+j*lda] += TempA[2] * TempB[1];
- C[2+8*n+j*lda] += TempA[2] * TempB[2];
- C[3+8*n+j*lda] += TempA[2] * TempB[3];
- C[4+8*n+j*lda] += TempA[2] * TempB[4];
- C[5+8*n+j*lda] += TempA[2] * TempB[5];
- C[6+8*n+j*lda] += TempA[2] * TempB[6];
- C[7+8*n+j*lda] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[3] * TempB[0];
- C[1+8*n+j*lda] += TempA[3] * TempB[1];
- C[2+8*n+j*lda] += TempA[3] * TempB[2];
- C[3+8*n+j*lda] += TempA[3] * TempB[3];
- C[4+8*n+j*lda] += TempA[3] * TempB[4];
- C[5+8*n+j*lda] += TempA[3] * TempB[5];
- C[6+8*n+j*lda] += TempA[3] * TempB[6];
- C[7+8*n+j*lda] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[4] * TempB[0];
- C[1+8*n+j*lda] += TempA[4] * TempB[1];
- C[2+8*n+j*lda] += TempA[4] * TempB[2];
- C[3+8*n+j*lda] += TempA[4] * TempB[3];
- C[4+8*n+j*lda] += TempA[4] * TempB[4];
- C[5+8*n+j*lda] += TempA[4] * TempB[5];
- C[6+8*n+j*lda] += TempA[4] * TempB[6];
- C[7+8*n+j*lda] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[5] * TempB[0];
- C[1+8*n+j*lda] += TempA[5] * TempB[1];
- C[2+8*n+j*lda] += TempA[5] * TempB[2];
- C[3+8*n+j*lda] += TempA[5] * TempB[3];
- C[4+8*n+j*lda] += TempA[5] * TempB[4];
- C[5+8*n+j*lda] += TempA[5] * TempB[5];
- C[6+8*n+j*lda] += TempA[5] * TempB[6];
- C[7+8*n+j*lda] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[6] * TempB[0];
- C[1+8*n+j*lda] += TempA[6] * TempB[1];
- C[2+8*n+j*lda] += TempA[6] * TempB[2];
- C[3+8*n+j*lda] += TempA[6] * TempB[3];
- C[4+8*n+j*lda] += TempA[6] * TempB[4];
- C[5+8*n+j*lda] += TempA[6] * TempB[5];
- C[6+8*n+j*lda] += TempA[6] * TempB[6];
- C[7+8*n+j*lda] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[7] * TempB[0];
- C[1+8*n+j*lda] += TempA[7] * TempB[1];
- C[2+8*n+j*lda] += TempA[7] * TempB[2];
- C[3+8*n+j*lda] += TempA[7] * TempB[3];
- C[4+8*n+j*lda] += TempA[7] * TempB[4];
- C[5+8*n+j*lda] += TempA[7] * TempB[5];
- C[6+8*n+j*lda] += TempA[7] * TempB[6];
- C[7+8*n+j*lda] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
- if(coreid ==0)
- {
- for ( j = 0; j < 16; j++ )
- {
-
- for ( m = 0; m < 4; m++ )
- {
-
- TempA[0] = A[j*lda+0+8*m];
- TempA[1] = A[j*lda+1+8*m];
- TempA[2] = A[j*lda+2+8*m];
- TempA[3] = A[j*lda+3+8*m];
- TempA[4] = A[j*lda+4+8*m];
- TempA[5] = A[j*lda+5+8*m];
- TempA[6] = A[j*lda+6+8*m];
- TempA[7] = A[j*lda+7+8*m];
-
- for( n = 0; n < 4; n++)
- {
- /*
- i = j*lda;
-
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[0] * TempB[0];
- C[1+8*n+i] += TempA[0] * TempB[1];
- C[2+8*n+i] += TempA[0] * TempB[2];
- C[3+8*n+i] += TempA[0] * TempB[3];
- C[4+8*n+i] += TempA[0] * TempB[4];
- C[5+8*n+i] += TempA[0] * TempB[5];
- C[6+8*n+i] += TempA[0] * TempB[6];
- C[7+8*n+i] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[1] * TempB[0];
- C[1+8*n+i] += TempA[1] * TempB[1];
- C[2+8*n+i] += TempA[1] * TempB[2];
- C[3+8*n+i] += TempA[1] * TempB[3];
- C[4+8*n+i] += TempA[1] * TempB[4];
- C[5+8*n+i] += TempA[1] * TempB[5];
- C[6+8*n+i] += TempA[1] * TempB[6];
- C[7+8*n+i] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[2] * TempB[0];
- C[1+8*n+i] += TempA[2] * TempB[1];
- C[2+8*n+i] += TempA[2] * TempB[2];
- C[3+8*n+i] += TempA[2] * TempB[3];
- C[4+8*n+i] += TempA[2] * TempB[4];
- C[5+8*n+i] += TempA[2] * TempB[5];
- C[6+8*n+i] += TempA[2] * TempB[6];
- C[7+8*n+i] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[3] * TempB[0];
- C[1+8*n+i] += TempA[3] * TempB[1];
- C[2+8*n+i] += TempA[3] * TempB[2];
- C[3+8*n+i] += TempA[3] * TempB[3];
- C[4+8*n+i] += TempA[3] * TempB[4];
- C[5+8*n+i] += TempA[3] * TempB[5];
- C[6+8*n+i] += TempA[3] * TempB[6];
- C[7+8*n+i] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[4] * TempB[0];
- C[1+8*n+i] += TempA[4] * TempB[1];
- C[2+8*n+i] += TempA[4] * TempB[2];
- C[3+8*n+i] += TempA[4] * TempB[3];
- C[4+8*n+i] += TempA[4] * TempB[4];
- C[5+8*n+i] += TempA[4] * TempB[5];
- C[6+8*n+i] += TempA[4] * TempB[6];
- C[7+8*n+i] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[5] * TempB[0];
- C[1+8*n+i] += TempA[5] * TempB[1];
- C[2+8*n+i] += TempA[5] * TempB[2];
- C[3+8*n+i] += TempA[5] * TempB[3];
- C[4+8*n+i] += TempA[5] * TempB[4];
- C[5+8*n+i] += TempA[5] * TempB[5];
- C[6+8*n+i] += TempA[5] * TempB[6];
- C[7+8*n+i] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[6] * TempB[0];
- C[1+8*n+i] += TempA[6] * TempB[1];
- C[2+8*n+i] += TempA[6] * TempB[2];
- C[3+8*n+i] += TempA[6] * TempB[3];
- C[4+8*n+i] += TempA[6] * TempB[4];
- C[5+8*n+i] += TempA[6] * TempB[5];
- C[6+8*n+i] += TempA[6] * TempB[6];
- C[7+8*n+i] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+i] += TempA[7] * TempB[0];
- C[1+8*n+i] += TempA[7] * TempB[1];
- C[2+8*n+i] += TempA[7] * TempB[2];
- C[3+8*n+i] += TempA[7] * TempB[3];
- C[4+8*n+i] += TempA[7] * TempB[4];
- C[5+8*n+i] += TempA[7] * TempB[5];
- C[6+8*n+i] += TempA[7] * TempB[6];
- C[7+8*n+i] += TempA[7] * TempB[7];
-
- */
- TempB[0] = B[(0+8*m)*lda+0+8*n];
- TempB[1] = B[(0+8*m)*lda+1+8*n];
- TempB[2] = B[(0+8*m)*lda+2+8*n];
- TempB[3] = B[(0+8*m)*lda+3+8*n];
- TempB[4] = B[(0+8*m)*lda+4+8*n];
- TempB[5] = B[(0+8*m)*lda+5+8*n];
- TempB[6] = B[(0+8*m)*lda+6+8*n];
- TempB[7] = B[(0+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[0] * TempB[0];
- C[1+8*n+j*lda] += TempA[0] * TempB[1];
- C[2+8*n+j*lda] += TempA[0] * TempB[2];
- C[3+8*n+j*lda] += TempA[0] * TempB[3];
- C[4+8*n+j*lda] += TempA[0] * TempB[4];
- C[5+8*n+j*lda] += TempA[0] * TempB[5];
- C[6+8*n+j*lda] += TempA[0] * TempB[6];
- C[7+8*n+j*lda] += TempA[0] * TempB[7];
-
-
-
- TempB[0] = B[(1+8*m)*lda+0+8*n];
- TempB[1] = B[(1+8*m)*lda+1+8*n];
- TempB[2] = B[(1+8*m)*lda+2+8*n];
- TempB[3] = B[(1+8*m)*lda+3+8*n];
- TempB[4] = B[(1+8*m)*lda+4+8*n];
- TempB[5] = B[(1+8*m)*lda+5+8*n];
- TempB[6] = B[(1+8*m)*lda+6+8*n];
- TempB[7] = B[(1+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[1] * TempB[0];
- C[1+8*n+j*lda] += TempA[1] * TempB[1];
- C[2+8*n+j*lda] += TempA[1] * TempB[2];
- C[3+8*n+j*lda] += TempA[1] * TempB[3];
- C[4+8*n+j*lda] += TempA[1] * TempB[4];
- C[5+8*n+j*lda] += TempA[1] * TempB[5];
- C[6+8*n+j*lda] += TempA[1] * TempB[6];
- C[7+8*n+j*lda] += TempA[1] * TempB[7];
-
-
-
- TempB[0] = B[(2+8*m)*lda+0+8*n];
- TempB[1] = B[(2+8*m)*lda+1+8*n];
- TempB[2] = B[(2+8*m)*lda+2+8*n];
- TempB[3] = B[(2+8*m)*lda+3+8*n];
- TempB[4] = B[(2+8*m)*lda+4+8*n];
- TempB[5] = B[(2+8*m)*lda+5+8*n];
- TempB[6] = B[(2+8*m)*lda+6+8*n];
- TempB[7] = B[(2+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[2] * TempB[0];
- C[1+8*n+j*lda] += TempA[2] * TempB[1];
- C[2+8*n+j*lda] += TempA[2] * TempB[2];
- C[3+8*n+j*lda] += TempA[2] * TempB[3];
- C[4+8*n+j*lda] += TempA[2] * TempB[4];
- C[5+8*n+j*lda] += TempA[2] * TempB[5];
- C[6+8*n+j*lda] += TempA[2] * TempB[6];
- C[7+8*n+j*lda] += TempA[2] * TempB[7];
-
-
-
- TempB[0] = B[(3+8*m)*lda+0+8*n];
- TempB[1] = B[(3+8*m)*lda+1+8*n];
- TempB[2] = B[(3+8*m)*lda+2+8*n];
- TempB[3] = B[(3+8*m)*lda+3+8*n];
- TempB[4] = B[(3+8*m)*lda+4+8*n];
- TempB[5] = B[(3+8*m)*lda+5+8*n];
- TempB[6] = B[(3+8*m)*lda+6+8*n];
- TempB[7] = B[(3+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[3] * TempB[0];
- C[1+8*n+j*lda] += TempA[3] * TempB[1];
- C[2+8*n+j*lda] += TempA[3] * TempB[2];
- C[3+8*n+j*lda] += TempA[3] * TempB[3];
- C[4+8*n+j*lda] += TempA[3] * TempB[4];
- C[5+8*n+j*lda] += TempA[3] * TempB[5];
- C[6+8*n+j*lda] += TempA[3] * TempB[6];
- C[7+8*n+j*lda] += TempA[3] * TempB[7];
-
-
- TempB[0] = B[(4+8*m)*lda+0+8*n];
- TempB[1] = B[(4+8*m)*lda+1+8*n];
- TempB[2] = B[(4+8*m)*lda+2+8*n];
- TempB[3] = B[(4+8*m)*lda+3+8*n];
- TempB[4] = B[(4+8*m)*lda+4+8*n];
- TempB[5] = B[(4+8*m)*lda+5+8*n];
- TempB[6] = B[(4+8*m)*lda+6+8*n];
- TempB[7] = B[(4+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[4] * TempB[0];
- C[1+8*n+j*lda] += TempA[4] * TempB[1];
- C[2+8*n+j*lda] += TempA[4] * TempB[2];
- C[3+8*n+j*lda] += TempA[4] * TempB[3];
- C[4+8*n+j*lda] += TempA[4] * TempB[4];
- C[5+8*n+j*lda] += TempA[4] * TempB[5];
- C[6+8*n+j*lda] += TempA[4] * TempB[6];
- C[7+8*n+j*lda] += TempA[4] * TempB[7];
-
-
-
- TempB[0] = B[(5+8*m)*lda+0+8*n];
- TempB[1] = B[(5+8*m)*lda+1+8*n];
- TempB[2] = B[(5+8*m)*lda+2+8*n];
- TempB[3] = B[(5+8*m)*lda+3+8*n];
- TempB[4] = B[(5+8*m)*lda+4+8*n];
- TempB[5] = B[(5+8*m)*lda+5+8*n];
- TempB[6] = B[(5+8*m)*lda+6+8*n];
- TempB[7] = B[(5+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[5] * TempB[0];
- C[1+8*n+j*lda] += TempA[5] * TempB[1];
- C[2+8*n+j*lda] += TempA[5] * TempB[2];
- C[3+8*n+j*lda] += TempA[5] * TempB[3];
- C[4+8*n+j*lda] += TempA[5] * TempB[4];
- C[5+8*n+j*lda] += TempA[5] * TempB[5];
- C[6+8*n+j*lda] += TempA[5] * TempB[6];
- C[7+8*n+j*lda] += TempA[5] * TempB[7];
-
-
-
- TempB[0] = B[(6+8*m)*lda+0+8*n];
- TempB[1] = B[(6+8*m)*lda+1+8*n];
- TempB[2] = B[(6+8*m)*lda+2+8*n];
- TempB[3] = B[(6+8*m)*lda+3+8*n];
- TempB[4] = B[(6+8*m)*lda+4+8*n];
- TempB[5] = B[(6+8*m)*lda+5+8*n];
- TempB[6] = B[(6+8*m)*lda+6+8*n];
- TempB[7] = B[(6+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[6] * TempB[0];
- C[1+8*n+j*lda] += TempA[6] * TempB[1];
- C[2+8*n+j*lda] += TempA[6] * TempB[2];
- C[3+8*n+j*lda] += TempA[6] * TempB[3];
- C[4+8*n+j*lda] += TempA[6] * TempB[4];
- C[5+8*n+j*lda] += TempA[6] * TempB[5];
- C[6+8*n+j*lda] += TempA[6] * TempB[6];
- C[7+8*n+j*lda] += TempA[6] * TempB[7];
-
-
- TempB[0] = B[(7+8*m)*lda+0+8*n];
- TempB[1] = B[(7+8*m)*lda+1+8*n];
- TempB[2] = B[(7+8*m)*lda+2+8*n];
- TempB[3] = B[(7+8*m)*lda+3+8*n];
- TempB[4] = B[(7+8*m)*lda+4+8*n];
- TempB[5] = B[(7+8*m)*lda+5+8*n];
- TempB[6] = B[(7+8*m)*lda+6+8*n];
- TempB[7] = B[(7+8*m)*lda+7+8*n];
-
- C[0+8*n+j*lda] += TempA[7] * TempB[0];
- C[1+8*n+j*lda] += TempA[7] * TempB[1];
- C[2+8*n+j*lda] += TempA[7] * TempB[2];
- C[3+8*n+j*lda] += TempA[7] * TempB[3];
- C[4+8*n+j*lda] += TempA[7] * TempB[4];
- C[5+8*n+j*lda] += TempA[7] * TempB[5];
- C[6+8*n+j*lda] += TempA[7] * TempB[6];
- C[7+8*n+j*lda] += TempA[7] * TempB[7];
- }
-
- }
- }
- }
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
- /*
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- */
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
- if(coreid ==0)
- {
- for (i = coreid; i < n-3; i+=4)
- {
- x[i] = x[i] + y[i];
- x[i+1] = x[i+1] + y[i+1];
- }
- i = i + 4;
-
-
- for (i; i < (n+1); i+=1)
- {
- x[i] = x[i] + y[i];
- }
-
-
- }
- if(coreid ==1)
- {
- for (i = 2; i < n; i+=4)
- {
- x[i] = x[i] + y[i];
- x[i+1] = x[i+1] + y[i+1];
-
- }
-
-
- }
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"\r
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )\r
+{\r
+ if(coreid > 1) return;
+ static __thread int i, j, k;\r
+ static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;\r
+ static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;\r
+\r
+ static __thread int start, end, jStride, jToRow, jToCol;\r
+ \r
+ start = coreid << 9;\r
+ end = ((ncores == 1) ? 2 : (coreid+1) ) << 9;\r
+ jStride = 8;\r
+\r
+ for (j=start; j < end; j+=jStride) {\r
+ jToRow = (j>>5)<<5;\r
+ jToCol = j%32;\r
+ tempC0 = 0;\r
+ tempC1 = 0;\r
+ tempC2 = 0;\r
+ tempC3 = 0;\r
+ tempC4 = 0;\r
+ tempC5 = 0;\r
+ tempC6 = 0;\r
+ tempC7 = 0;\r
+ for ( i=0; i < lda; i+=2 ) {\r
+ tempA0 = A[i + jToRow];\r
+ tempA1 = A[i+1 + jToRow];\r
+ tempC0 += tempA0 * B[(jToCol ) + (i<<5)];\r
+ tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)];\r
+ tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)];\r
+ tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)];\r
+ tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)];\r
+ tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)];\r
+ tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)];\r
+ tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)];\r
+ tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)];\r
+ tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)];\r
+ tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)];\r
+ tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)];\r
+ tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)];\r
+ tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)];\r
+ tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)];\r
+ tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)];\r
+ }\r
+ C[j] =tempC0;\r
+ C[j + 1 ]=tempC1;\r
+ C[j + 2 ]=tempC2;\r
+ C[j + 3 ]=tempC3;\r
+ C[j + 4 ]=tempC4;\r
+ C[j + 5 ]=tempC5;\r
+ C[j + 6 ]=tempC6;\r
+ C[j + 7 ]=tempC7;\r
+ }\r
+ \r
+}\r
+++ /dev/null
-//**************************************************************************\r
-// Multi-threaded Matrix Multiply benchmark\r
-//--------------------------------------------------------------------------\r
-// TA : Christopher Celio\r
-// Student: \r
-//\r
-//\r
-// This benchmark multiplies two 2-D arrays together and writes the results to\r
-// a third vector. The input data (and reference data) should be generated\r
-// using the matmul_gendata.pl perl script and dumped to a file named\r
-// dataset.h. \r
-\r
-\r
-// print out arrays, etc.\r
-//#define DEBUG\r
-\r
-//--------------------------------------------------------------------------\r
-// Includes \r
-\r
-#include <string.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Input/Reference Data\r
-\r
-typedef float data_t;\r
-#include "dataset.h"\r
- \r
- \r
-//--------------------------------------------------------------------------\r
-// Basic Utilities and Multi-thread Support\r
-\r
-__thread unsigned long coreid;\r
-unsigned long ncores;\r
-\r
-#include "util.h"\r
- \r
-#define stringify_1(s) #s\r
-#define stringify(s) stringify_1(s)\r
-#define stats(code) do { \\r
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \\r
- code; \\r
- _c += rdcycle(), _i += rdinstret(); \\r
- if (coreid == 0) \\r
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \\r
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \\r
- } while(0)\r
- \r
-\r
-//--------------------------------------------------------------------------\r
-// Helper functions\r
- \r
-void printArrayMT( char name[], int n, data_t arr[] )\r
-{\r
- int i;\r
- if (coreid != 0)\r
- return;\r
- \r
- printf( " %10s :", name );\r
- for ( i = 0; i < n; i++ )\r
- printf( " %3ld ", (long) arr[i] );\r
- printf( "\n" );\r
-}\r
- \r
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
-{\r
- if (coreid != 0)\r
- return;\r
-\r
- size_t i;\r
- for (i = 0; i < n; i++)\r
- {\r
- if (test[i] != correct[i])\r
- {\r
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", \r
- i, (long)test[i], i, (long)correct[i]);\r
- exit(-1);\r
- }\r
- }\r
- \r
- return;\r
-}\r
- \r
-//--------------------------------------------------------------------------\r
-// matmul function\r
- \r
-// single-thread, naive version\r
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- int i, j, k;\r
-\r
- if (coreid > 0)\r
- return;\r
- \r
- for ( i = 0; i < lda; i++ )\r
- for ( j = 0; j < lda; j++ ) \r
- {\r
- for ( k = 0; k < lda; k++ ) \r
- {\r
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];\r
- }\r
- }\r
-\r
-}\r
- \r
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- static __thread int i, j, k;\r
- static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;\r
- static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;\r
-\r
- static __thread int start, end, jStride, jToRow, jToCol;\r
- \r
- start = coreid << 9;\r
- end = (coreid+1) << 9;\r
- jStride = 8;\r
-\r
- for (j=start; j < end; j+=jStride) {\r
- jToRow = (j>>5)<<5;\r
- jToCol = j%32;\r
- tempC0 = 0;\r
- tempC1 = 0;\r
- tempC2 = 0;\r
- tempC3 = 0;\r
- tempC4 = 0;\r
- tempC5 = 0;\r
- tempC6 = 0;\r
- tempC7 = 0;\r
- for ( i=0; i < lda; i+=2 ) {\r
- tempA0 = A[i + jToRow];\r
- tempA1 = A[i+1 + jToRow];\r
- tempC0 += tempA0 * B[(jToCol ) + (i<<5)];\r
- tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)];\r
- tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)];\r
- tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)];\r
- tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)];\r
- tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)];\r
- tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)];\r
- tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)];\r
- tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)];\r
- tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)];\r
- tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)];\r
- tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)];\r
- tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)];\r
- tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)];\r
- tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)];\r
- tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)];\r
- }\r
- C[j] =tempC0;\r
- C[j + 1 ]=tempC1;\r
- C[j + 2 ]=tempC2;\r
- C[j + 3 ]=tempC3;\r
- C[j + 4 ]=tempC4;\r
- C[j + 5 ]=tempC5;\r
- C[j + 6 ]=tempC6;\r
- C[j + 7 ]=tempC7;\r
- }\r
- \r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// Main\r
-//\r
-// all threads start executing thread_entry(). Use their "coreid" to\r
-// differentiate between threads (each thread is running on a separate core).\r
- \r
-void thread_entry(int cid, int nc)\r
-{\r
- coreid = cid;\r
- ncores = nc;\r
-\r
- // static allocates data in the binary, which is visible to both threads\r
- static data_t results_data[ARRAY_SIZE];\r
-\r
-\r
- //// Execute the provided, naive matmul\r
- //barrier(nc);\r
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
- \r
- //\r
- //// verify\r
- //verifyMT(ARRAY_SIZE, results_data, verify_data);\r
- //\r
- //// clear results from the first trial\r
- //size_t i;\r
- //if (coreid == 0) \r
- // for (i=0; i < ARRAY_SIZE; i++)\r
- // results_data[i] = 0;\r
- //barrier(nc);\r
-\r
- \r
- // Execute your faster matmul\r
- barrier(nc);\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
- \r
-#ifdef DEBUG\r
- printArrayMT("results:", ARRAY_SIZE, results_data);\r
- printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
-#endif\r
- \r
- // verify\r
- verifyMT(ARRAY_SIZE, results_data, verify_data);\r
- barrier(nc);\r
-\r
- exit(0);\r
-}\r
-\r
-\r
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************\r
-// Multi-threaded Matrix Multiply benchmark\r
-//--------------------------------------------------------------------------\r
-// TA : Christopher Celio\r
-// Student: \r
-//\r
-//\r
-// This benchmark multiplies two 2-D arrays together and writes the results to\r
-// a third vector. The input data (and reference data) should be generated\r
-// using the matmul_gendata.pl perl script and dumped to a file named\r
-// dataset.h. \r
-\r
-\r
-// print out arrays, etc.\r
-//#define DEBUG\r
-\r
-//--------------------------------------------------------------------------\r
-// Includes \r
-\r
-#include <string.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Input/Reference Data\r
-\r
-typedef float data_t;\r
-#include "dataset.h"\r
- \r
- \r
-//--------------------------------------------------------------------------\r
-// Basic Utilities and Multi-thread Support\r
-\r
-__thread unsigned long coreid;\r
-unsigned long ncores;\r
-\r
-#include "util.h"\r
- \r
-#define stringify_1(s) #s\r
-#define stringify(s) stringify_1(s)\r
-#define stats(code) do { \\r
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \\r
- code; \\r
- _c += rdcycle(), _i += rdinstret(); \\r
- if (coreid == 0) \\r
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \\r
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \\r
- } while(0)\r
- \r
-\r
-//--------------------------------------------------------------------------\r
-// Helper functions\r
- \r
-void printArrayMT( char name[], int n, data_t arr[] )\r
-{\r
- int i;\r
- if (coreid != 0)\r
- return;\r
- \r
- printf( " %10s :", name );\r
- for ( i = 0; i < n; i++ )\r
- printf( " %3ld ", (long) arr[i] );\r
- printf( "\n" );\r
-}\r
- \r
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
-{\r
- if (coreid != 0)\r
- return;\r
-\r
- size_t i;\r
- for (i = 0; i < n; i++)\r
- {\r
- if (test[i] != correct[i])\r
- {\r
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", \r
- i, (long)test[i], i, (long)correct[i]);\r
- exit(-1);\r
- }\r
- }\r
- \r
- return;\r
-}\r
- \r
-//--------------------------------------------------------------------------\r
-// matmul function\r
- \r
-// single-thread, naive version\r
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- int i, j, k;\r
-\r
- if (coreid > 0)\r
- return;\r
- \r
- for ( i = 0; i < lda; i++ )\r
- for ( j = 0; j < lda; j++ ) \r
- {\r
- for ( k = 0; k < lda; k++ ) \r
- {\r
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];\r
- }\r
- }\r
-\r
-}\r
- \r
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- static __thread int i, j, k;\r
- static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;\r
- static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;\r
-\r
- static __thread int start, end, jStride, jToRow, jToCol;\r
- static data_t A1[1024], B1[1024];;\r
- \r
- start = coreid << 9;\r
- end = (coreid+1) << 9;\r
- jStride = 8;\r
-\r
- if (coreid == 0) { \r
- for (j=start; j < end; j+=jStride) {\r
- jToRow = (j>>5)<<5;\r
- jToCol = j%32;\r
- tempC0 = 0;\r
- tempC1 = 0;\r
- tempC2 = 0;\r
- tempC3 = 0;\r
- tempC4 = 0;\r
- tempC5 = 0;\r
- tempC6 = 0;\r
- tempC7 = 0;\r
- for ( i=0; i < lda; i+=2 ) {\r
- tempA0 = A[i + jToRow];\r
- tempA1 = A[i+1 + jToRow];\r
- tempC0 += tempA0 * B[(jToCol ) + (i<<5)];\r
- tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)];\r
- tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)];\r
- tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)];\r
- tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)];\r
- tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)];\r
- tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)];\r
- tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)];\r
- tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)];\r
- tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)];\r
- tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)];\r
- tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)];\r
- tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)];\r
- tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)];\r
- tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)];\r
- tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)];\r
- }\r
- C[j] =tempC0;\r
- C[j + 1 ]=tempC1;\r
- C[j + 2 ]=tempC2;\r
- C[j + 3 ]=tempC3;\r
- C[j + 4 ]=tempC4;\r
- C[j + 5 ]=tempC5;\r
- C[j + 6 ]=tempC6;\r
- C[j + 7 ]=tempC7;\r
- }\r
- }\r
- else { \r
- for (i = 0; i < 1024; i++) {\r
- A1[i] = A[i];\r
- B1[i] = B[i];\r
- }\r
- for (j=start; j < end; j+=jStride) {\r
- jToRow = (j>>5)<<5;\r
- jToCol = j%32;\r
- tempC0 = 0;\r
- tempC1 = 0;\r
- tempC2 = 0;\r
- tempC3 = 0;\r
- tempC4 = 0;\r
- tempC5 = 0;\r
- tempC6 = 0;\r
- tempC7 = 0;\r
- for ( i=0; i < lda; i+=2 ) {\r
- tempA0 = A1[i + jToRow];\r
- tempA1 = A1[i+1 + jToRow];\r
- tempC0 += tempA0 * B1[(jToCol ) + (i<<5)];\r
- tempC1 += tempA0 * B1[(jToCol+1 ) + (i<<5)];\r
- tempC2 += tempA0 * B1[(jToCol+2 ) + (i<<5)];\r
- tempC3 += tempA0 * B1[(jToCol+3 ) + (i<<5)];\r
- tempC4 += tempA0 * B1[(jToCol+4 ) + (i<<5)];\r
- tempC5 += tempA0 * B1[(jToCol+5 ) + (i<<5)];\r
- tempC6 += tempA0 * B1[(jToCol+6 ) + (i<<5)];\r
- tempC7 += tempA0 * B1[(jToCol+7 ) + (i<<5)];\r
- tempC0 += tempA1 * B1[(jToCol ) + ((i+1)<<5)];\r
- tempC1 += tempA1 * B1[(jToCol+1 ) + ((i+1)<<5)];\r
- tempC2 += tempA1 * B1[(jToCol+2 ) + ((i+1)<<5)];\r
- tempC3 += tempA1 * B1[(jToCol+3 ) + ((i+1)<<5)];\r
- tempC4 += tempA1 * B1[(jToCol+4 ) + ((i+1)<<5)];\r
- tempC5 += tempA1 * B1[(jToCol+5 ) + ((i+1)<<5)];\r
- tempC6 += tempA1 * B1[(jToCol+6 ) + ((i+1)<<5)];\r
- tempC7 += tempA1 * B1[(jToCol+7 ) + ((i+1)<<5)];\r
- }\r
- C[j] =tempC0;\r
- C[j + 1 ]=tempC1;\r
- C[j + 2 ]=tempC2;\r
- C[j + 3 ]=tempC3;\r
- C[j + 4 ]=tempC4;\r
- C[j + 5 ]=tempC5;\r
- C[j + 6 ]=tempC6;\r
- C[j + 7 ]=tempC7;\r
- }\r
- }\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// Main\r
-//\r
-// all threads start executing thread_entry(). Use their "coreid" to\r
-// differentiate between threads (each thread is running on a separate core).\r
- \r
-void thread_entry(int cid, int nc)\r
-{\r
- coreid = cid;\r
- ncores = nc;\r
-\r
- // static allocates data in the binary, which is visible to both threads\r
- static data_t results_data[ARRAY_SIZE];\r
-\r
-\r
- //// Execute the provided, naive matmul\r
- //barrier(nc);\r
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
- \r
- //\r
- //// verify\r
- //verifyMT(ARRAY_SIZE, results_data, verify_data);\r
- //\r
- //// clear results from the first trial\r
- //size_t i;\r
- //if (coreid == 0) \r
- // for (i=0; i < ARRAY_SIZE; i++)\r
- // results_data[i] = 0;\r
- //barrier(nc);\r
-\r
- \r
- // Execute your faster matmul\r
- barrier(nc);\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
- \r
-#ifdef DEBUG\r
- printArrayMT("results:", ARRAY_SIZE, results_data);\r
- printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
-#endif\r
- \r
- // verify\r
- verifyMT(ARRAY_SIZE, results_data, verify_data);\r
- barrier(nc);\r
-\r
- exit(0);\r
-}\r
-\r
-\r
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // Each core uses its own block
- if (coreid == 0) {
- for (i = 0; i < (n/2); i++) {
- x[i] = x[i] + y[i];
- }
- }
- else {
- for (i = (n/2); i < n; i++) {
- x[i] = x[i] + y[i];
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ if(coreid > 1) return;
+ static __thread int i, j, k;
+ static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;
+ static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; //tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;
+
+ static __thread int start, end, jStride, jToRow, jToCol, iToRow;
+
+ start = coreid << 9;
+ end = ((ncores == 1) ? 2 : (coreid+1)) << 9;
+ jStride = 8;
+
+ for (j=start; j < end; j+=jStride) {
+ jToRow = (j>>5)<<5;
+ jToCol = j%32;
+ tempC0 = 0;
+ tempC1 = 0;
+ tempC2 = 0;
+ tempC3 = 0;
+ tempC4 = 0;
+ tempC5 = 0;
+ tempC6 = 0;
+ tempC7 = 0;
+ //tempC8 = 0;
+ //tempC9 = 0;
+ //tempC10 = 0;
+ //tempC11 = 0;
+ //tempC12 = 0;
+ //tempC13 = 0;
+ //tempC14 = 0;
+ //tempC15 = 0;
+
+ for ( i=0; i < lda; i+=2 ) {
+ iToRow = i << 5;
+
+ tempA0 = A[i + jToRow];
+ tempA1 = A[i+1 + jToRow];
+ //tempA2 = A[i+2 + jToRow];
+ //tempA3 = A[i+3 + jToRow];
+ //tempA4 = A[i+4 + jToRow];
+ //tempA5 = A[i+5 + jToRow];
+ //tempA6 = A[i+6 + jToRow];
+ //tempA7 = A[i+7 + jToRow];
+
+ tempC0 += tempA0 * B[(jToCol ) + (iToRow)];
+ tempC1 += tempA0 * B[(jToCol+1 ) + (iToRow)];
+ tempC2 += tempA0 * B[(jToCol+2 ) + (iToRow)];
+ tempC3 += tempA0 * B[(jToCol+3 ) + (iToRow)];
+ tempC4 += tempA0 * B[(jToCol+4 ) + (iToRow)];
+ tempC5 += tempA0 * B[(jToCol+5 ) + (iToRow)];
+ tempC6 += tempA0 * B[(jToCol+6 ) + (iToRow)];
+ tempC7 += tempA0 * B[(jToCol+7 ) + (iToRow)];
+ //tempC8 += tempA0 * B[(jToCol+8 ) + (iToRow)];
+ //tempC9 += tempA0 * B[(jToCol+9 ) + (iToRow)];
+ //tempC10 += tempA0 * B[(jToCol+10) + (iToRow)];
+ //tempC11 += tempA0 * B[(jToCol+11) + (iToRow)];
+ //tempC12 += tempA0 * B[(jToCol+12) + (iToRow)];
+ //tempC13 += tempA0 * B[(jToCol+13) + (iToRow)];
+ //tempC14 += tempA0 * B[(jToCol+14) + (iToRow)];
+ //tempC15 += tempA0 * B[(jToCol+15) + (iToRow)];
+
+ iToRow += 32;
+ tempC0 += tempA1 * B[(jToCol ) + (iToRow)];
+ tempC1 += tempA1 * B[(jToCol+1 ) + (iToRow)];
+ tempC2 += tempA1 * B[(jToCol+2 ) + (iToRow)];
+ tempC3 += tempA1 * B[(jToCol+3 ) + (iToRow)];
+ tempC4 += tempA1 * B[(jToCol+4 ) + (iToRow)];
+ tempC5 += tempA1 * B[(jToCol+5 ) + (iToRow)];
+ tempC6 += tempA1 * B[(jToCol+6 ) + (iToRow)];
+ tempC7 += tempA1 * B[(jToCol+7 ) + (iToRow)];
+ //tempC8 += tempA1 * B[(jToCol+8 ) + (iToRow+32)];
+ //tempC9 += tempA1 * B[(jToCol+9 ) + (iToRow+32)];
+ //tempC10 += tempA1 * B[(jToCol+10) + (iToRow+32)];
+ //tempC11 += tempA1 * B[(jToCol+11) + (iToRow+32)];
+ //tempC12 += tempA1 * B[(jToCol+12) + (iToRow+32)];
+ //tempC13 += tempA1 * B[(jToCol+13) + (iToRow+32)];
+ //tempC14 += tempA1 * B[(jToCol+14) + (iToRow+32)];
+ //tempC15 += tempA1 * B[(jToCol+15) + (iToRow+32)];
+
+ //iToRow += 32;
+ //tempC0 += tempA2 * B[(jToCol ) + (iToRow)];
+ //tempC1 += tempA2 * B[(jToCol+1 ) + (iToRow)];
+ //tempC2 += tempA2 * B[(jToCol+2 ) + (iToRow)];
+ //tempC3 += tempA2 * B[(jToCol+3 ) + (iToRow)];
+ //tempC4 += tempA2 * B[(jToCol+4 ) + (iToRow)];
+ //tempC5 += tempA2 * B[(jToCol+5 ) + (iToRow)];
+ //tempC6 += tempA2 * B[(jToCol+6 ) + (iToRow)];
+ //tempC7 += tempA2 * B[(jToCol+7 ) + (iToRow)];
+ //tempC8 += tempA2 * B[(jToCol+8 ) + (iToRow)];
+ //tempC9 += tempA2 * B[(jToCol+9 ) + (iToRow)];
+ //tempC10 += tempA2 * B[(jToCol+10) + (iToRow)];
+ //tempC11 += tempA2 * B[(jToCol+11) + (iToRow)];
+ //tempC12 += tempA2 * B[(jToCol+12) + (iToRow)];
+ //tempC13 += tempA2 * B[(jToCol+13) + (iToRow)];
+ //tempC14 += tempA2 * B[(jToCol+14) + (iToRow)];
+ //tempC15 += tempA2 * B[(jToCol+15) + (iToRow)];
+
+ //iToRow += 32;
+ //tempC0 += tempA3 * B[(jToCol ) + (iToRow)];
+ //tempC1 += tempA3 * B[(jToCol+1 ) + (iToRow)];
+ //tempC2 += tempA3 * B[(jToCol+2 ) + (iToRow)];
+ //tempC3 += tempA3 * B[(jToCol+3 ) + (iToRow)];
+ //tempC4 += tempA3 * B[(jToCol+4 ) + (iToRow)];
+ //tempC5 += tempA3 * B[(jToCol+5 ) + (iToRow)];
+ //tempC6 += tempA3 * B[(jToCol+6 ) + (iToRow)];
+ //tempC7 += tempA3 * B[(jToCol+7 ) + (iToRow)];
+ //tempC8 += tempA3 * B[(jToCol+8 ) + (iToRow)];
+ //tempC9 += tempA3 * B[(jToCol+9 ) + (iToRow)];
+ //tempC10 += tempA3 * B[(jToCol+10) + (iToRow)];
+ //tempC11 += tempA3 * B[(jToCol+11) + (iToRow)];
+ //tempC12 += tempA3 * B[(jToCol+12) + (iToRow)];
+ //tempC13 += tempA3 * B[(jToCol+13) + (iToRow)];
+ //tempC14 += tempA3 * B[(jToCol+14) + (iToRow)];
+ //tempC15 += tempA3 * B[(jToCol+15) + (iToRow)];
+
+ //iToRow += 32;
+ //tempC0 += tempA4 * B[(jToCol ) + (iToRow)];
+ //tempC1 += tempA4 * B[(jToCol+1 ) + (iToRow)];
+ //tempC2 += tempA4 * B[(jToCol+2 ) + (iToRow)];
+ //tempC3 += tempA4 * B[(jToCol+3 ) + (iToRow)];
+ //tempC4 += tempA4 * B[(jToCol+4 ) + (iToRow)];
+ //tempC5 += tempA4 * B[(jToCol+5 ) + (iToRow)];
+ //tempC6 += tempA4 * B[(jToCol+6 ) + (iToRow)];
+ //tempC7 += tempA4 * B[(jToCol+7 ) + (iToRow)];
+ //
+ //iToRow += 32;
+ //tempC0 += tempA5 * B[(jToCol ) + (iToRow)];
+ //tempC1 += tempA5 * B[(jToCol+1 ) + (iToRow)];
+ //tempC2 += tempA5 * B[(jToCol+2 ) + (iToRow)];
+ //tempC3 += tempA5 * B[(jToCol+3 ) + (iToRow)];
+ //tempC4 += tempA5 * B[(jToCol+4 ) + (iToRow)];
+ //tempC5 += tempA5 * B[(jToCol+5 ) + (iToRow)];
+ //tempC6 += tempA5 * B[(jToCol+6 ) + (iToRow)];
+ //tempC7 += tempA5 * B[(jToCol+7 ) + (iToRow)];
+ //
+ //iToRow += 32;
+ //tempC0 += tempA6 * B[(jToCol ) + (iToRow)];
+ //tempC1 += tempA6 * B[(jToCol+1 ) + (iToRow)];
+ //tempC2 += tempA6 * B[(jToCol+2 ) + (iToRow)];
+ //tempC3 += tempA6 * B[(jToCol+3 ) + (iToRow)];
+ //tempC4 += tempA6 * B[(jToCol+4 ) + (iToRow)];
+ //tempC5 += tempA6 * B[(jToCol+5 ) + (iToRow)];
+ //tempC6 += tempA6 * B[(jToCol+6 ) + (iToRow)];
+ //tempC7 += tempA6 * B[(jToCol+7 ) + (iToRow)];
+ //
+ //iToRow += 32;
+ //tempC0 += tempA7 * B[(jToCol ) + (iToRow)];
+ //tempC1 += tempA7 * B[(jToCol+1 ) + (iToRow)];
+ //tempC2 += tempA7 * B[(jToCol+2 ) + (iToRow)];
+ //tempC3 += tempA7 * B[(jToCol+3 ) + (iToRow)];
+ //tempC4 += tempA7 * B[(jToCol+4 ) + (iToRow)];
+ //tempC5 += tempA7 * B[(jToCol+5 ) + (iToRow)];
+ //tempC6 += tempA7 * B[(jToCol+6 ) + (iToRow)];
+ //tempC7 += tempA7 * B[(jToCol+7 ) + (iToRow)];
+
+ }
+ C[j ] = tempC0;
+ C[j + 1 ] = tempC1;
+ C[j + 2 ] = tempC2;
+ C[j + 3 ] = tempC3;
+ C[j + 4 ] = tempC4;
+ C[j + 5 ] = tempC5;
+ C[j + 6 ] = tempC6;
+ C[j + 7 ] = tempC7;
+ //C[j + 8 ] = tempC8 ;
+ //C[j + 9 ] = tempC9 ;
+ //C[j + 10] = tempC10;
+ //C[j + 11] = tempC11;
+ //C[j + 12] = tempC12;
+ //C[j + 13] = tempC13;
+ //C[j + 14] = tempC14;
+ //C[j + 15] = tempC15;
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-data_t ffmul(data_t a, data_t b) {
- data_t result = 0;
-
- for (int i=0; i < b; i++) {
- result += a;
- }
-
- return result;
-}
-
-
-//void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-//{
-//
-// // ***************************** //
-// // **** ADD YOUR CODE HERE ***** //
-// // ***************************** //
-// //
-// // feel free to make a separate function for MI and MSI versions.
-//
-// static __thread int i, j, k;
-// static __thread int jlda, ilda;
-// static __thread data_t tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7, tempA8;
-// static __thread int start, end;
-//
-// start = coreid*(lda>>1);
-// end = (coreid+1)*(lda>>1);
-//
-// for (j=start; j < end; j+=1) {
-// jlda = j * lda;
-// for ( i=0; i < lda; i+=1 ) {
-// ilda = i*lda;
-// tempA1 = A[i + jlda];
-// //tempA2 = A[i+1 + jlda];
-// //tempA3 = A[i+2 + jlda];
-// //tempA4 = A[i+3 + jlda];
-// //tempA5 = A[i+4 + jlda];
-// //tempA6 = A[i+5 + jlda];
-// //tempA7 = A[i+6 + jlda];
-// //tempA8 = A[i+7 + jlda];
-// //tempC1 = C[i + j*lda];
-// //tempC2 = C[i+1 + j*lda];
-// for(k=0; k < lda; k+=1) {
-// //C[k + jlda] += tempA1 * B[k + i*lda] + tempA2 * B[k + (i+1)*lda] + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] +
-// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda];
-//
-// C[k + jlda] += tempA1* B[k + i*lda];// + ffmul(tempA2,B[k + (i+1)*lda]) + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] +
-// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda];
-// //
-// //C[k+1 + jlda] += tempA1 * B[k+1 + i*lda] + tempA2 * B[k+1 + (i+1)*lda] + tempA3 * B[k+1 + (i+2)*lda] + tempA4 * B[k+1 + (i+3)*lda] +
-// // tempA5 * B[k+1 + (i+4)*lda] + tempA6 * B[k+1 + (i+5)*lda] + tempA7 * B[k+1 + (i+6)*lda] + tempA8 * B[k+1 + (i+7)*lda];
-// //
-// //C[k+2 + jlda] += tempA1 * B[k+2 + i*lda] + tempA2 * B[k+2 + (i+1)*lda] + tempA3 * B[k+2 + (i+2)*lda] + tempA4 * B[k+2 + (i+3)*lda] +
-// // tempA5 * B[k+2 + (i+4)*lda] + tempA6 * B[k+2 + (i+5)*lda] + tempA7 * B[k+2 + (i+6)*lda] + tempA8 * B[k+2 + (i+7)*lda];
-// //
-// //C[k+3 + jlda] += tempA1 * B[k+3 + i*lda] + tempA2 * B[k+3 + (i+1)*lda] + tempA3 * B[k+3 + (i+2)*lda] + tempA4 * B[k+3 + (i+3)*lda] +
-// // tempA5 * B[k+3 + (i+4)*lda] + tempA6 * B[k+3 + (i+5)*lda] + tempA7 * B[k+3 + (i+6)*lda] + tempA8 * B[k+3 + (i+7)*lda];
-// //
-// //C[k+4 + jlda] += tempA1 * B[k+4 + i*lda] + tempA2 * B[k+4 + (i+1)*lda] + tempA3 * B[k+4 + (i+2)*lda] + tempA4 * B[k+4 + (i+3)*lda] +
-// // tempA5 * B[k+4 + (i+4)*lda] + tempA6 * B[k+4 + (i+5)*lda] + tempA7 * B[k+4 + (i+6)*lda] + tempA8 * B[k+4 + (i+7)*lda];
-// //
-// //C[k+5 + jlda] += tempA1 * B[k+5 + i*lda] + tempA2 * B[k+5 + (i+1)*lda] + tempA3 * B[k+5 + (i+2)*lda] + tempA4 * B[k+5 + (i+3)*lda] +
-// // tempA5 * B[k+5 + (i+4)*lda] + tempA6 * B[k+5 + (i+5)*lda] + tempA7 * B[k+5 + (i+6)*lda] + tempA8 * B[k+5 + (i+7)*lda];
-// //
-// //C[k+6 + jlda] += tempA1 * B[k+6 + i*lda] + tempA2 * B[k+6 + (i+1)*lda] + tempA3 * B[k+6 + (i+2)*lda] + tempA4 * B[k+6 + (i+3)*lda] +
-// // tempA5 * B[k+6 + (i+4)*lda] + tempA6 * B[k+6 + (i+5)*lda] + tempA7 * B[k+6 + (i+6)*lda] + tempA8 * B[k+6 + (i+7)*lda];
-// //
-// //C[k+7 + jlda] += tempA1 * B[k+7 + i*lda] + tempA2 * B[k+7 + (i+1)*lda] + tempA3 * B[k+7 + (i+2)*lda] + tempA4 * B[k+7 + (i+3)*lda] +
-// // tempA5 * B[k+7 + (i+4)*lda] + tempA6 * B[k+7 + (i+5)*lda] + tempA7 * B[k+7 + (i+6)*lda] + tempA8 * B[k+7 + (i+7)*lda];
-//
-//
-// }
-// }
-// }
-//}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- static __thread int i, j, k;
- static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;
- static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; //tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;
-
- static __thread int start, end, jStride, jToRow, jToCol, iToRow;
-
- start = coreid << 9;
- end = (coreid+1) << 9;
- jStride = 8;
-
- for (j=start; j < end; j+=jStride) {
- jToRow = (j>>5)<<5;
- jToCol = j%32;
- tempC0 = 0;
- tempC1 = 0;
- tempC2 = 0;
- tempC3 = 0;
- tempC4 = 0;
- tempC5 = 0;
- tempC6 = 0;
- tempC7 = 0;
- //tempC8 = 0;
- //tempC9 = 0;
- //tempC10 = 0;
- //tempC11 = 0;
- //tempC12 = 0;
- //tempC13 = 0;
- //tempC14 = 0;
- //tempC15 = 0;
-
- for ( i=0; i < lda; i+=2 ) {
- iToRow = i << 5;
-
- tempA0 = A[i + jToRow];
- tempA1 = A[i+1 + jToRow];
- //tempA2 = A[i+2 + jToRow];
- //tempA3 = A[i+3 + jToRow];
- //tempA4 = A[i+4 + jToRow];
- //tempA5 = A[i+5 + jToRow];
- //tempA6 = A[i+6 + jToRow];
- //tempA7 = A[i+7 + jToRow];
-
- tempC0 += tempA0 * B[(jToCol ) + (iToRow)];
- tempC1 += tempA0 * B[(jToCol+1 ) + (iToRow)];
- tempC2 += tempA0 * B[(jToCol+2 ) + (iToRow)];
- tempC3 += tempA0 * B[(jToCol+3 ) + (iToRow)];
- tempC4 += tempA0 * B[(jToCol+4 ) + (iToRow)];
- tempC5 += tempA0 * B[(jToCol+5 ) + (iToRow)];
- tempC6 += tempA0 * B[(jToCol+6 ) + (iToRow)];
- tempC7 += tempA0 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA0 * B[(jToCol+8 ) + (iToRow)];
- //tempC9 += tempA0 * B[(jToCol+9 ) + (iToRow)];
- //tempC10 += tempA0 * B[(jToCol+10) + (iToRow)];
- //tempC11 += tempA0 * B[(jToCol+11) + (iToRow)];
- //tempC12 += tempA0 * B[(jToCol+12) + (iToRow)];
- //tempC13 += tempA0 * B[(jToCol+13) + (iToRow)];
- //tempC14 += tempA0 * B[(jToCol+14) + (iToRow)];
- //tempC15 += tempA0 * B[(jToCol+15) + (iToRow)];
-
- iToRow += 32;
- tempC0 += tempA1 * B[(jToCol ) + (iToRow)];
- tempC1 += tempA1 * B[(jToCol+1 ) + (iToRow)];
- tempC2 += tempA1 * B[(jToCol+2 ) + (iToRow)];
- tempC3 += tempA1 * B[(jToCol+3 ) + (iToRow)];
- tempC4 += tempA1 * B[(jToCol+4 ) + (iToRow)];
- tempC5 += tempA1 * B[(jToCol+5 ) + (iToRow)];
- tempC6 += tempA1 * B[(jToCol+6 ) + (iToRow)];
- tempC7 += tempA1 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA1 * B[(jToCol+8 ) + (iToRow+32)];
- //tempC9 += tempA1 * B[(jToCol+9 ) + (iToRow+32)];
- //tempC10 += tempA1 * B[(jToCol+10) + (iToRow+32)];
- //tempC11 += tempA1 * B[(jToCol+11) + (iToRow+32)];
- //tempC12 += tempA1 * B[(jToCol+12) + (iToRow+32)];
- //tempC13 += tempA1 * B[(jToCol+13) + (iToRow+32)];
- //tempC14 += tempA1 * B[(jToCol+14) + (iToRow+32)];
- //tempC15 += tempA1 * B[(jToCol+15) + (iToRow+32)];
-
- //iToRow += 32;
- //tempC0 += tempA2 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA2 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA2 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA2 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA2 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA2 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA2 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA2 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA2 * B[(jToCol+8 ) + (iToRow)];
- //tempC9 += tempA2 * B[(jToCol+9 ) + (iToRow)];
- //tempC10 += tempA2 * B[(jToCol+10) + (iToRow)];
- //tempC11 += tempA2 * B[(jToCol+11) + (iToRow)];
- //tempC12 += tempA2 * B[(jToCol+12) + (iToRow)];
- //tempC13 += tempA2 * B[(jToCol+13) + (iToRow)];
- //tempC14 += tempA2 * B[(jToCol+14) + (iToRow)];
- //tempC15 += tempA2 * B[(jToCol+15) + (iToRow)];
-
- //iToRow += 32;
- //tempC0 += tempA3 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA3 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA3 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA3 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA3 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA3 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA3 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA3 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA3 * B[(jToCol+8 ) + (iToRow)];
- //tempC9 += tempA3 * B[(jToCol+9 ) + (iToRow)];
- //tempC10 += tempA3 * B[(jToCol+10) + (iToRow)];
- //tempC11 += tempA3 * B[(jToCol+11) + (iToRow)];
- //tempC12 += tempA3 * B[(jToCol+12) + (iToRow)];
- //tempC13 += tempA3 * B[(jToCol+13) + (iToRow)];
- //tempC14 += tempA3 * B[(jToCol+14) + (iToRow)];
- //tempC15 += tempA3 * B[(jToCol+15) + (iToRow)];
-
- //iToRow += 32;
- //tempC0 += tempA4 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA4 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA4 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA4 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA4 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA4 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA4 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA4 * B[(jToCol+7 ) + (iToRow)];
- //
- //iToRow += 32;
- //tempC0 += tempA5 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA5 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA5 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA5 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA5 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA5 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA5 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA5 * B[(jToCol+7 ) + (iToRow)];
- //
- //iToRow += 32;
- //tempC0 += tempA6 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA6 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA6 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA6 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA6 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA6 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA6 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA6 * B[(jToCol+7 ) + (iToRow)];
- //
- //iToRow += 32;
- //tempC0 += tempA7 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA7 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA7 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA7 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA7 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA7 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA7 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA7 * B[(jToCol+7 ) + (iToRow)];
-
- }
- C[j ] = tempC0;
- C[j + 1 ] = tempC1;
- C[j + 2 ] = tempC2;
- C[j + 3 ] = tempC3;
- C[j + 4 ] = tempC4;
- C[j + 5 ] = tempC5;
- C[j + 6 ] = tempC6;
- C[j + 7 ] = tempC7;
- //C[j + 8 ] = tempC8 ;
- //C[j + 9 ] = tempC9 ;
- //C[j + 10] = tempC10;
- //C[j + 11] = tempC11;
- //C[j + 12] = tempC12;
- //C[j + 13] = tempC13;
- //C[j + 14] = tempC14;
- //C[j + 15] = tempC15;
- }
-}
-
-
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- //// Execute the provided, naive matmul
- //barrier(nc);
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
- //
- //// verify
- //verifyMT(ARRAY_SIZE, results_data, verify_data);
- //
- //// clear results from the first trial
- //size_t i;
- //if (coreid == 0)
- // for (i=0; i < ARRAY_SIZE; i++)
- // results_data[i] = 0;
- //barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-data_t ffmul(data_t a, data_t b) {
- data_t result = 0;
-
- for (int i=0; i < b; i++) {
- result += a;
- }
-
- return result;
-}
-
-
-//void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-//{
-//
-// // ***************************** //
-// // **** ADD YOUR CODE HERE ***** //
-// // ***************************** //
-// //
-// // feel free to make a separate function for MI and MSI versions.
-//
-// static __thread int i, j, k;
-// static __thread int jlda, ilda;
-// static __thread data_t tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7, tempA8;
-// static __thread int start, end;
-//
-// start = coreid*(lda>>1);
-// end = (coreid+1)*(lda>>1);
-//
-// for (j=start; j < end; j+=1) {
-// jlda = j * lda;
-// for ( i=0; i < lda; i+=1 ) {
-// ilda = i*lda;
-// tempA1 = A[i + jlda];
-// //tempA2 = A[i+1 + jlda];
-// //tempA3 = A[i+2 + jlda];
-// //tempA4 = A[i+3 + jlda];
-// //tempA5 = A[i+4 + jlda];
-// //tempA6 = A[i+5 + jlda];
-// //tempA7 = A[i+6 + jlda];
-// //tempA8 = A[i+7 + jlda];
-// //tempC1 = C[i + j*lda];
-// //tempC2 = C[i+1 + j*lda];
-// for(k=0; k < lda; k+=1) {
-// //C[k + jlda] += tempA1 * B[k + i*lda] + tempA2 * B[k + (i+1)*lda] + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] +
-// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda];
-//
-// C[k + jlda] += tempA1* B[k + i*lda];// + ffmul(tempA2,B[k + (i+1)*lda]) + tempA3 * B[k + (i+2)*lda] + tempA4 * B[k + (i+3)*lda] +
-// // tempA5 * B[k + (i+4)*lda] + tempA6 * B[k + (i+5)*lda] + tempA7 * B[k + (i+6)*lda] + tempA8 * B[k + (i+7)*lda];
-// //
-// //C[k+1 + jlda] += tempA1 * B[k+1 + i*lda] + tempA2 * B[k+1 + (i+1)*lda] + tempA3 * B[k+1 + (i+2)*lda] + tempA4 * B[k+1 + (i+3)*lda] +
-// // tempA5 * B[k+1 + (i+4)*lda] + tempA6 * B[k+1 + (i+5)*lda] + tempA7 * B[k+1 + (i+6)*lda] + tempA8 * B[k+1 + (i+7)*lda];
-// //
-// //C[k+2 + jlda] += tempA1 * B[k+2 + i*lda] + tempA2 * B[k+2 + (i+1)*lda] + tempA3 * B[k+2 + (i+2)*lda] + tempA4 * B[k+2 + (i+3)*lda] +
-// // tempA5 * B[k+2 + (i+4)*lda] + tempA6 * B[k+2 + (i+5)*lda] + tempA7 * B[k+2 + (i+6)*lda] + tempA8 * B[k+2 + (i+7)*lda];
-// //
-// //C[k+3 + jlda] += tempA1 * B[k+3 + i*lda] + tempA2 * B[k+3 + (i+1)*lda] + tempA3 * B[k+3 + (i+2)*lda] + tempA4 * B[k+3 + (i+3)*lda] +
-// // tempA5 * B[k+3 + (i+4)*lda] + tempA6 * B[k+3 + (i+5)*lda] + tempA7 * B[k+3 + (i+6)*lda] + tempA8 * B[k+3 + (i+7)*lda];
-// //
-// //C[k+4 + jlda] += tempA1 * B[k+4 + i*lda] + tempA2 * B[k+4 + (i+1)*lda] + tempA3 * B[k+4 + (i+2)*lda] + tempA4 * B[k+4 + (i+3)*lda] +
-// // tempA5 * B[k+4 + (i+4)*lda] + tempA6 * B[k+4 + (i+5)*lda] + tempA7 * B[k+4 + (i+6)*lda] + tempA8 * B[k+4 + (i+7)*lda];
-// //
-// //C[k+5 + jlda] += tempA1 * B[k+5 + i*lda] + tempA2 * B[k+5 + (i+1)*lda] + tempA3 * B[k+5 + (i+2)*lda] + tempA4 * B[k+5 + (i+3)*lda] +
-// // tempA5 * B[k+5 + (i+4)*lda] + tempA6 * B[k+5 + (i+5)*lda] + tempA7 * B[k+5 + (i+6)*lda] + tempA8 * B[k+5 + (i+7)*lda];
-// //
-// //C[k+6 + jlda] += tempA1 * B[k+6 + i*lda] + tempA2 * B[k+6 + (i+1)*lda] + tempA3 * B[k+6 + (i+2)*lda] + tempA4 * B[k+6 + (i+3)*lda] +
-// // tempA5 * B[k+6 + (i+4)*lda] + tempA6 * B[k+6 + (i+5)*lda] + tempA7 * B[k+6 + (i+6)*lda] + tempA8 * B[k+6 + (i+7)*lda];
-// //
-// //C[k+7 + jlda] += tempA1 * B[k+7 + i*lda] + tempA2 * B[k+7 + (i+1)*lda] + tempA3 * B[k+7 + (i+2)*lda] + tempA4 * B[k+7 + (i+3)*lda] +
-// // tempA5 * B[k+7 + (i+4)*lda] + tempA6 * B[k+7 + (i+5)*lda] + tempA7 * B[k+7 + (i+6)*lda] + tempA8 * B[k+7 + (i+7)*lda];
-//
-//
-// }
-// }
-// }
-//}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- static __thread int i, j, k;
- static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;
- static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7; //tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;
-
- static __thread int start, end, jStride, jToRow, jToCol, iToRow;
-
- start = coreid << 9;
- end = (coreid+1) << 9;
- jStride = 8;
-
- for (j=start; j < end; j+=jStride) {
- jToRow = (j>>5)<<5;
- jToCol = j%32;
- tempC0 = 0;
- tempC1 = 0;
- tempC2 = 0;
- tempC3 = 0;
- tempC4 = 0;
- tempC5 = 0;
- tempC6 = 0;
- tempC7 = 0;
- //tempC8 = 0;
- //tempC9 = 0;
- //tempC10 = 0;
- //tempC11 = 0;
- //tempC12 = 0;
- //tempC13 = 0;
- //tempC14 = 0;
- //tempC15 = 0;
-
- for ( i=0; i < lda; i+=2 ) {
- iToRow = i << 5;
-
- tempA0 = A[i + jToRow];
- tempA1 = A[i+1 + jToRow];
- //tempA2 = A[i+2 + jToRow];
- //tempA3 = A[i+3 + jToRow];
- //tempA4 = A[i+4 + jToRow];
- //tempA5 = A[i+5 + jToRow];
- //tempA6 = A[i+6 + jToRow];
- //tempA7 = A[i+7 + jToRow];
-
- tempC0 += tempA0 * B[(jToCol ) + (iToRow)];
- tempC1 += tempA0 * B[(jToCol+1 ) + (iToRow)];
- tempC2 += tempA0 * B[(jToCol+2 ) + (iToRow)];
- tempC3 += tempA0 * B[(jToCol+3 ) + (iToRow)];
- tempC4 += tempA0 * B[(jToCol+4 ) + (iToRow)];
- tempC5 += tempA0 * B[(jToCol+5 ) + (iToRow)];
- tempC6 += tempA0 * B[(jToCol+6 ) + (iToRow)];
- tempC7 += tempA0 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA0 * B[(jToCol+8 ) + (iToRow)];
- //tempC9 += tempA0 * B[(jToCol+9 ) + (iToRow)];
- //tempC10 += tempA0 * B[(jToCol+10) + (iToRow)];
- //tempC11 += tempA0 * B[(jToCol+11) + (iToRow)];
- //tempC12 += tempA0 * B[(jToCol+12) + (iToRow)];
- //tempC13 += tempA0 * B[(jToCol+13) + (iToRow)];
- //tempC14 += tempA0 * B[(jToCol+14) + (iToRow)];
- //tempC15 += tempA0 * B[(jToCol+15) + (iToRow)];
-
- iToRow += 32;
- tempC0 += tempA1 * B[(jToCol ) + (iToRow)];
- tempC1 += tempA1 * B[(jToCol+1 ) + (iToRow)];
- tempC2 += tempA1 * B[(jToCol+2 ) + (iToRow)];
- tempC3 += tempA1 * B[(jToCol+3 ) + (iToRow)];
- tempC4 += tempA1 * B[(jToCol+4 ) + (iToRow)];
- tempC5 += tempA1 * B[(jToCol+5 ) + (iToRow)];
- tempC6 += tempA1 * B[(jToCol+6 ) + (iToRow)];
- tempC7 += tempA1 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA1 * B[(jToCol+8 ) + (iToRow+32)];
- //tempC9 += tempA1 * B[(jToCol+9 ) + (iToRow+32)];
- //tempC10 += tempA1 * B[(jToCol+10) + (iToRow+32)];
- //tempC11 += tempA1 * B[(jToCol+11) + (iToRow+32)];
- //tempC12 += tempA1 * B[(jToCol+12) + (iToRow+32)];
- //tempC13 += tempA1 * B[(jToCol+13) + (iToRow+32)];
- //tempC14 += tempA1 * B[(jToCol+14) + (iToRow+32)];
- //tempC15 += tempA1 * B[(jToCol+15) + (iToRow+32)];
-
- //iToRow += 32;
- //tempC0 += tempA2 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA2 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA2 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA2 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA2 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA2 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA2 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA2 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA2 * B[(jToCol+8 ) + (iToRow)];
- //tempC9 += tempA2 * B[(jToCol+9 ) + (iToRow)];
- //tempC10 += tempA2 * B[(jToCol+10) + (iToRow)];
- //tempC11 += tempA2 * B[(jToCol+11) + (iToRow)];
- //tempC12 += tempA2 * B[(jToCol+12) + (iToRow)];
- //tempC13 += tempA2 * B[(jToCol+13) + (iToRow)];
- //tempC14 += tempA2 * B[(jToCol+14) + (iToRow)];
- //tempC15 += tempA2 * B[(jToCol+15) + (iToRow)];
-
- //iToRow += 32;
- //tempC0 += tempA3 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA3 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA3 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA3 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA3 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA3 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA3 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA3 * B[(jToCol+7 ) + (iToRow)];
- //tempC8 += tempA3 * B[(jToCol+8 ) + (iToRow)];
- //tempC9 += tempA3 * B[(jToCol+9 ) + (iToRow)];
- //tempC10 += tempA3 * B[(jToCol+10) + (iToRow)];
- //tempC11 += tempA3 * B[(jToCol+11) + (iToRow)];
- //tempC12 += tempA3 * B[(jToCol+12) + (iToRow)];
- //tempC13 += tempA3 * B[(jToCol+13) + (iToRow)];
- //tempC14 += tempA3 * B[(jToCol+14) + (iToRow)];
- //tempC15 += tempA3 * B[(jToCol+15) + (iToRow)];
-
- //iToRow += 32;
- //tempC0 += tempA4 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA4 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA4 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA4 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA4 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA4 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA4 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA4 * B[(jToCol+7 ) + (iToRow)];
- //
- //iToRow += 32;
- //tempC0 += tempA5 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA5 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA5 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA5 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA5 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA5 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA5 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA5 * B[(jToCol+7 ) + (iToRow)];
- //
- //iToRow += 32;
- //tempC0 += tempA6 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA6 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA6 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA6 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA6 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA6 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA6 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA6 * B[(jToCol+7 ) + (iToRow)];
- //
- //iToRow += 32;
- //tempC0 += tempA7 * B[(jToCol ) + (iToRow)];
- //tempC1 += tempA7 * B[(jToCol+1 ) + (iToRow)];
- //tempC2 += tempA7 * B[(jToCol+2 ) + (iToRow)];
- //tempC3 += tempA7 * B[(jToCol+3 ) + (iToRow)];
- //tempC4 += tempA7 * B[(jToCol+4 ) + (iToRow)];
- //tempC5 += tempA7 * B[(jToCol+5 ) + (iToRow)];
- //tempC6 += tempA7 * B[(jToCol+6 ) + (iToRow)];
- //tempC7 += tempA7 * B[(jToCol+7 ) + (iToRow)];
-
- }
- C[j ] = tempC0;
- C[j + 1 ] = tempC1;
- C[j + 2 ] = tempC2;
- C[j + 3 ] = tempC3;
- C[j + 4 ] = tempC4;
- C[j + 5 ] = tempC5;
- C[j + 6 ] = tempC6;
- C[j + 7 ] = tempC7;
- //C[j + 8 ] = tempC8 ;
- //C[j + 9 ] = tempC9 ;
- //C[j + 10] = tempC10;
- //C[j + 11] = tempC11;
- //C[j + 12] = tempC12;
- //C[j + 13] = tempC13;
- //C[j + 14] = tempC14;
- //C[j + 15] = tempC15;
- }
-}
-
-
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- //// Execute the provided, naive matmul
- //barrier(nc);
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
- //
- //// verify
- //verifyMT(ARRAY_SIZE, results_data, verify_data);
- //
- //// clear results from the first trial
- //size_t i;
- //if (coreid == 0)
- // for (i=0; i < ARRAY_SIZE; i++)
- // results_data[i] = 0;
- //barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
- size_t split = n / ncores;
-
- //interleave accesses
- for (i = coreid * split; i < (coreid+1)*split-1 && i < n-1; i+=2) {
- x[i] = x[i] + y[i];
- x[i+1] = x[i+1] + y[i+1];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-/*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-*/
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t c_start = lda / ncores * coreid;
- size_t c_row;
- size_t c_col;
- size_t colSplit = 0;
- size_t i;
- size_t useSplit = 0;
- data_t a1;
- data_t a2;
- data_t a3;
- data_t a4;
- data_t a5;
- data_t a6;
- data_t a7;
- data_t a8;
- data_t c1;
- data_t c2;
- data_t c3;
- data_t c4;
- data_t c5;
- data_t c6;
- data_t c7;
- data_t c8;
- size_t block;
- for (block = 0; block < 2; block++) {
- for (colSplit = 0; colSplit < 4; colSplit++) {
- useSplit = (coreid == 0) ? colSplit : (colSplit + 2 ) % 4;
- for (c_row = c_start + block * 8; c_row < c_start + block * 8 + 8; c_row += 2) {
- for (c_col = 0; c_col < lda; c_col+=4) {
- c1 = C[c_row*lda+c_col];
- c2 = C[(c_row+1)*lda+c_col];
- c3 = C[c_row*lda+c_col+1];
- c4 = C[(c_row+1)*lda+c_col+1];
- c5 = C[c_row*lda+c_col+2];
- c6 = C[(c_row+1)*lda+c_col+2];
- c7 = C[c_row*lda+c_col+3];
- c8 = C[(c_row+1)*lda+c_col+3];
- for (i = useSplit * lda / 4; i < (useSplit + 1) * lda / 4; i+=4) {
- a1 = A[c_row*lda+i];
- a2 = A[(c_row+1)*lda+i];
- a3 = A[c_row*lda+i+1];
- a4 = A[(c_row+1)*lda+i+1];
- a5 = A[c_row*lda+i+2];
- a6 = A[(c_row+1)*lda+i+2];
- a7 = A[c_row*lda+i+3];
- a8 = A[(c_row+1)*lda+i+3];
-
- c1 += a1 * B[i*lda+c_col];
- c2 += a2 * B[i*lda+c_col];
-
- c1 += a3 * B[(i+1)*lda+c_col];
- c2 += a4 * B[(i+1)*lda+c_col];
-
- c1 += a5 * B[(i+2)*lda+c_col];
- c2 += a6 * B[(i+2)*lda+c_col];
-
- c1 += a7 * B[(i+3)*lda+c_col];
- c2 += a8 * B[(i+3)*lda+c_col];
-
- c3 += a1 * B[i*lda+c_col+1];
- c4 += a2 * B[i*lda+c_col+1];
-
- c3 += a3 * B[(i+1)*lda+c_col+1];
- c4 += a4 * B[(i+1)*lda+c_col+1];
-
- c3 += a5 * B[(i+2)*lda+c_col+1];
- c4 += a6 * B[(i+2)*lda+c_col+1];
-
- c3 += a7 * B[(i+3)*lda+c_col+1];
- c4 += a8 * B[(i+3)*lda+c_col+1];
-
- c5 += a1 * B[i*lda+c_col+2];
- c6 += a2 * B[i*lda+c_col+2];
-
- c5 += a3 * B[(i+1)*lda+c_col+2];
- c6 += a4 * B[(i+1)*lda+c_col+2];
-
- c5 += a5 * B[(i+2)*lda+c_col+2];
- c6 += a6 * B[(i+2)*lda+c_col+2];
-
- c5 += a7 * B[(i+3)*lda+c_col+2];
- c6 += a8 * B[(i+3)*lda+c_col+2];
-
- c7 += a1 * B[i*lda+c_col+3];
- c8 += a2 * B[i*lda+c_col+3];
-
- c7 += a3 * B[(i+1)*lda+c_col+3];
- c8 += a4 * B[(i+1)*lda+c_col+3];
-
- c7 += a5 * B[(i+2)*lda+c_col+3];
- c8 += a6 * B[(i+2)*lda+c_col+3];
-
- c7 += a7 * B[(i+3)*lda+c_col+3];
- c8 += a8 * B[(i+3)*lda+c_col+3];
- }
-
- C[c_row*lda+c_col] = c1;
- C[(c_row+1)*lda+c_col] = c2;
-
- C[c_row*lda+c_col+1] = c3;
- C[(c_row+1)*lda+c_col+1] = c4;
-
- C[c_row*lda+c_col+2] = c5;
- C[(c_row+1)*lda+c_col+2] = c6;
-
- C[c_row*lda+c_col+3] = c7;
- C[(c_row+1)*lda+c_col+3] = c8;
- }
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-/*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-*/
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- size_t c_start = lda / ncores * coreid;
- size_t c_row;
- size_t c_col;
- size_t colSplit = 0;
- size_t i;
- size_t useSplit = 0;
- data_t a1;
- data_t a2;
- data_t a3;
- data_t a4;
- data_t a5;
- data_t a6;
- data_t a7;
- data_t a8;
- data_t c1;
- data_t c2;
- data_t c3;
- data_t c4;
- data_t c5;
- data_t c6;
- data_t c7;
- data_t c8;
- size_t block;
- for (block = 0; block < 2; block++) {
- for (colSplit = 0; colSplit < 4; colSplit++) {
- useSplit = (coreid == 0) ? colSplit : (colSplit + 2 ) % 4;
- for (c_row = c_start + block * 8; c_row < c_start + block * 8 + 8; c_row += 2) {
- for (c_col = 0; c_col < lda; c_col+=4) {
- c1 = C[c_row*lda+c_col];
- c2 = C[(c_row+1)*lda+c_col];
- c3 = C[c_row*lda+c_col+1];
- c4 = C[(c_row+1)*lda+c_col+1];
- c5 = C[c_row*lda+c_col+2];
- c6 = C[(c_row+1)*lda+c_col+2];
- c7 = C[c_row*lda+c_col+3];
- c8 = C[(c_row+1)*lda+c_col+3];
- for (i = useSplit * lda / 4; i < (useSplit + 1) * lda / 4; i+=4) {
- a1 = A[c_row*lda+i];
- a2 = A[(c_row+1)*lda+i];
- a3 = A[c_row*lda+i+1];
- a4 = A[(c_row+1)*lda+i+1];
- a5 = A[c_row*lda+i+2];
- a6 = A[(c_row+1)*lda+i+2];
- a7 = A[c_row*lda+i+3];
- a8 = A[(c_row+1)*lda+i+3];
-
- c1 += a1 * B[i*lda+c_col];
- c2 += a2 * B[i*lda+c_col];
-
- c1 += a3 * B[(i+1)*lda+c_col];
- c2 += a4 * B[(i+1)*lda+c_col];
-
- c1 += a5 * B[(i+2)*lda+c_col];
- c2 += a6 * B[(i+2)*lda+c_col];
-
- c1 += a7 * B[(i+3)*lda+c_col];
- c2 += a8 * B[(i+3)*lda+c_col];
-
- c3 += a1 * B[i*lda+c_col+1];
- c4 += a2 * B[i*lda+c_col+1];
-
- c3 += a3 * B[(i+1)*lda+c_col+1];
- c4 += a4 * B[(i+1)*lda+c_col+1];
-
- c3 += a5 * B[(i+2)*lda+c_col+1];
- c4 += a6 * B[(i+2)*lda+c_col+1];
-
- c3 += a7 * B[(i+3)*lda+c_col+1];
- c4 += a8 * B[(i+3)*lda+c_col+1];
-
- c5 += a1 * B[i*lda+c_col+2];
- c6 += a2 * B[i*lda+c_col+2];
-
- c5 += a3 * B[(i+1)*lda+c_col+2];
- c6 += a4 * B[(i+1)*lda+c_col+2];
-
- c5 += a5 * B[(i+2)*lda+c_col+2];
- c6 += a6 * B[(i+2)*lda+c_col+2];
-
- c5 += a7 * B[(i+3)*lda+c_col+2];
- c6 += a8 * B[(i+3)*lda+c_col+2];
-
- c7 += a1 * B[i*lda+c_col+3];
- c8 += a2 * B[i*lda+c_col+3];
-
- c7 += a3 * B[(i+1)*lda+c_col+3];
- c8 += a4 * B[(i+1)*lda+c_col+3];
-
- c7 += a5 * B[(i+2)*lda+c_col+3];
- c8 += a6 * B[(i+2)*lda+c_col+3];
-
- c7 += a7 * B[(i+3)*lda+c_col+3];
- c8 += a8 * B[(i+3)*lda+c_col+3];
- }
-
- C[c_row*lda+c_col] = c1;
- C[(c_row+1)*lda+c_col] = c2;
-
- C[c_row*lda+c_col+1] = c3;
- C[(c_row+1)*lda+c_col+1] = c4;
-
- C[c_row*lda+c_col+2] = c5;
- C[(c_row+1)*lda+c_col+2] = c6;
-
- C[c_row*lda+c_col+3] = c7;
- C[(c_row+1)*lda+c_col+3] = c8;
- }
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
- size_t start = n * coreid / ncores;
- size_t end = (coreid == ncores - 1) ? n : n * (coreid+1)/ ncores;
- for (i = start; i < end; i++) {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+
+ for (i = 0; i < lda; i += 2) {
+ for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
+ register data_t c00 = 0, c01 = 0;
+ register data_t c10 = 0, c11 = 0;
+ register data_t c20 = 0, c21 = 0;
+ register data_t c30 = 0, c31 = 0;
+
+ register data_t a0, a1, a2, a3, b0, b1;
+ for (k = 0; k < lda; k++) {
+ a0 = A[j*lda + k + 0*lda];
+ a1 = A[j*lda + k + 1*lda];
+ a2 = A[j*lda + k + 2*lda];
+ a3 = A[j*lda + k + 3*lda];
+
+ b0 = B[k*lda + i + 0];
+ b1 = B[k*lda + i + 1];
+
+ c00 += a0 * b0; c01 += a0 * b1;
+ c10 += a1 * b0; c11 += a1 * b1;
+ c20 += a2 * b0; c21 += a2 * b1;
+ c30 += a3 * b0; c31 += a3 * b1;
+ }
+
+ C[i + j*lda + 0 + 0*lda] = c00; C[i + j*lda + 1 + 0*lda] = c01;
+ C[i + j*lda + 0 + 1*lda] = c10; C[i + j*lda + 1 + 1*lda] = c11;
+ C[i + j*lda + 0 + 2*lda] = c20; C[i + j*lda + 1 + 2*lda] = c21;
+ C[i + j*lda + 0 + 3*lda] = c30; C[i + j*lda + 1 + 3*lda] = c31;
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul_msi(const int lda, const data_t A[], const data_t B[], data_t C[] ) {
- int i, j, k;
-
- for (i = 0; i < lda; i += 2) {
- for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
- //for (j = 0; j < lda; j += 4) {
- register data_t c00 = 0, c01 = 0;
- register data_t c10 = 0, c11 = 0;
- register data_t c20 = 0, c21 = 0;
- register data_t c30 = 0, c31 = 0;
-
- register data_t a0, a1, a2, a3, b0, b1;
- for (k = 0; k < lda; k++) {
- a0 = A[j*lda + k + 0*lda];
- a1 = A[j*lda + k + 1*lda];
- a2 = A[j*lda + k + 2*lda];
- a3 = A[j*lda + k + 3*lda];
-
- b0 = B[k*lda + i + 0];
- b1 = B[k*lda + i + 1];
- /*if (coreid == 0) {
- printf("i = %d; j = %d; k = %d\n", i, j, k);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c00, (int)a0, (int)b0, (int)c01, (int)a0, (int)b1);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c10, (int)a1, (int)b0, (int)c11, (int)a1, (int)b1);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c20, (int)a2, (int)b0, (int)c21, (int)a2, (int)b1);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c30, (int)a3, (int)b0, (int)c31, (int)a3, (int)b1);
- printf("\n");
- }*/
-
- c00 += a0 * b0; c01 += a0 * b1;
- c10 += a1 * b0; c11 += a1 * b1;
- c20 += a2 * b0; c21 += a2 * b1;
- c30 += a3 * b0; c31 += a3 * b1;
- }
-
- C[i + j*lda + 0 + 0*lda] = c00; C[i + j*lda + 1 + 0*lda] = c01;
- C[i + j*lda + 0 + 1*lda] = c10; C[i + j*lda + 1 + 1*lda] = c11;
- C[i + j*lda + 0 + 2*lda] = c20; C[i + j*lda + 1 + 2*lda] = c21;
- C[i + j*lda + 0 + 3*lda] = c30; C[i + j*lda + 1 + 3*lda] = c31;
- }
- }
-}
-
-void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const data_t B[], data_t C[] ) {
- int i, j, k;
-
- int curhalf = coreid;
- for (i = 0; i < lda; i += 2) {
- for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
- register float c00 = 0, c01 = 0;
- register float c10 = 0, c11 = 0;
- register float c20 = 0, c21 = 0;
- register float c30 = 0, c31 = 0;
-
- register float a0, a1, a2, a3, b0, b1;
- for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) {
- a0 = A[j*lda + k + 0*lda];
- a1 = A[j*lda + k + 1*lda];
- a2 = A[j*lda + k + 2*lda];
- a3 = A[j*lda + k + 3*lda];
-
- b0 = B[k*lda + i + 0];
- b1 = B[k*lda + i + 1];
-
- c00 += a0 * b0; c01 += a0 * b1;
- c10 += a1 * b0; c11 += a1 * b1;
- c20 += a2 * b0; c21 += a2 * b1;
- c30 += a3 * b0; c31 += a3 * b1;
- }
-
- C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01;
- C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11;
- C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21;
- C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31;
- }
- }
-
- barrier(ncores);
- curhalf++;
- curhalf %= ncores;
-
- for (i = 0; i < lda; i += 2) {
- for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
- register float c00 = 0, c01 = 0;
- register float c10 = 0, c11 = 0;
- register float c20 = 0, c21 = 0;
- register float c30 = 0, c31 = 0;
-
- register float a0, a1, a2, a3, b0, b1;
- for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) {
- a0 = A[j*lda + k + 0*lda];
- a1 = A[j*lda + k + 1*lda];
- a2 = A[j*lda + k + 2*lda];
- a3 = A[j*lda + k + 3*lda];
-
- b0 = B[k*lda + i + 0];
- b1 = B[k*lda + i + 1];
-
- c00 += a0 * b0; c01 += a0 * b1;
- c10 += a1 * b0; c11 += a1 * b1;
- c20 += a2 * b0; c21 += a2 * b1;
- c30 += a3 * b0; c31 += a3 * b1;
- }
-
- C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01;
- C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11;
- C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21;
- C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31;
- }
- }
-}
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- matmul_msi(lda, A, B, C);
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul_msi(const int lda, const data_t A[], const data_t B[], data_t C[] ) {
- int i, j, k;
-
- for (i = 0; i < lda; i += 2) {
- for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
- //for (j = 0; j < lda; j += 4) {
- register data_t c00 = 0, c01 = 0;
- register data_t c10 = 0, c11 = 0;
- register data_t c20 = 0, c21 = 0;
- register data_t c30 = 0, c31 = 0;
-
- register data_t a0, a1, a2, a3, b0, b1;
- for (k = 0; k < lda; k++) {
- a0 = A[j*lda + k + 0*lda];
- a1 = A[j*lda + k + 1*lda];
- a2 = A[j*lda + k + 2*lda];
- a3 = A[j*lda + k + 3*lda];
-
- b0 = B[k*lda + i + 0];
- b1 = B[k*lda + i + 1];
- /*if (coreid == 0) {
- printf("i = %d; j = %d; k = %d\n", i, j, k);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c00, (int)a0, (int)b0, (int)c01, (int)a0, (int)b1);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c10, (int)a1, (int)b0, (int)c11, (int)a1, (int)b1);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c20, (int)a2, (int)b0, (int)c21, (int)a2, (int)b1);
- printf("%d += %d * %d; %d += %d * %d\n", (int)c30, (int)a3, (int)b0, (int)c31, (int)a3, (int)b1);
- printf("\n");
- }*/
-
- c00 += a0 * b0; c01 += a0 * b1;
- c10 += a1 * b0; c11 += a1 * b1;
- c20 += a2 * b0; c21 += a2 * b1;
- c30 += a3 * b0; c31 += a3 * b1;
- }
-
- C[i + j*lda + 0 + 0*lda] = c00; C[i + j*lda + 1 + 0*lda] = c01;
- C[i + j*lda + 0 + 1*lda] = c10; C[i + j*lda + 1 + 1*lda] = c11;
- C[i + j*lda + 0 + 2*lda] = c20; C[i + j*lda + 1 + 2*lda] = c21;
- C[i + j*lda + 0 + 3*lda] = c30; C[i + j*lda + 1 + 3*lda] = c31;
- }
- }
-}
-
-void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const data_t B[], data_t C[] ) {
- int i, j, k;
-
- int curhalf = coreid;
- for (i = 0; i < lda; i += 2) {
- for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
- register float c00 = 0, c01 = 0;
- register float c10 = 0, c11 = 0;
- register float c20 = 0, c21 = 0;
- register float c30 = 0, c31 = 0;
-
- register float a0, a1, a2, a3, b0, b1;
- for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) {
- a0 = A[j*lda + k + 0*lda];
- a1 = A[j*lda + k + 1*lda];
- a2 = A[j*lda + k + 2*lda];
- a3 = A[j*lda + k + 3*lda];
-
- b0 = B[k*lda + i + 0];
- b1 = B[k*lda + i + 1];
-
- c00 += a0 * b0; c01 += a0 * b1;
- c10 += a1 * b0; c11 += a1 * b1;
- c20 += a2 * b0; c21 += a2 * b1;
- c30 += a3 * b0; c31 += a3 * b1;
- }
-
- C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01;
- C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11;
- C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21;
- C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31;
- }
- }
-
- barrier(nc);
- curhalf++;
- curhalf %= ncores;
-
- for (i = 0; i < lda; i += 2) {
- for (j = coreid * (lda / ncores); j < (coreid + 1) * (lda / ncores); j += 4) {
- register float c00 = 0, c01 = 0;
- register float c10 = 0, c11 = 0;
- register float c20 = 0, c21 = 0;
- register float c30 = 0, c31 = 0;
-
- register float a0, a1, a2, a3, b0, b1;
- for (k = curhalf * (lda/2); k < curhalf * (lda/2) + (lda/2); k++) {
- a0 = A[j*lda + k + 0*lda];
- a1 = A[j*lda + k + 1*lda];
- a2 = A[j*lda + k + 2*lda];
- a3 = A[j*lda + k + 3*lda];
-
- b0 = B[k*lda + i + 0];
- b1 = B[k*lda + i + 1];
-
- c00 += a0 * b0; c01 += a0 * b1;
- c10 += a1 * b0; c11 += a1 * b1;
- c20 += a2 * b0; c21 += a2 * b1;
- c30 += a3 * b0; c31 += a3 * b1;
- }
-
- C[i + j*lda + 0 + 0*lda] += c00; C[i + j*lda + 1 + 0*lda] += c01;
- C[i + j*lda + 0 + 1*lda] += c10; C[i + j*lda + 1 + 1*lda] += c11;
- C[i + j*lda + 0 + 2*lda] += c20; C[i + j*lda + 1 + 2*lda] += c21;
- C[i + j*lda + 0 + 3*lda] += c30; C[i + j*lda + 1 + 3*lda] += c31;
- }
- }
-}
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- matmul_mi(lda, A, B, C);
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- for (i = coreid * (n / ncores); i < (coreid + 1) * (n / ncores); i++) {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+
+#define REG_I 8
+#define REG_J 2
+//#define BLOCK_I 32
+#define BLOCK_J 16
+#define BLOCK_K 16
+#define LDA 32
+#define NCORES 2
+#define MIN(X,Y) (X < Y ? X : Y)
+
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+ int i, j, k, ri, rj, ii, jj, kk;
+ data_t *Aj, *Cj, *Bi;
+ data_t c[REG_I][REG_J], a[REG_J], b[REG_I];
+ size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES));
+
+ /* if (coreid > 0) { */
+ /* return; */
+ /* } */
+ /* start = 0, end = lda; */
+ if (ncores == NCORES && lda == LDA) {
+ for (jj = start; jj < end; jj += BLOCK_J)
+ for (kk = 0; kk < LDA; kk += BLOCK_K)
+ //for (ii = 0; ii < LDA; ii += BLOCK_I)
+ for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) {
+ Aj = A + j*LDA;
+ Cj = C + j*LDA;
+ for (i = 0; i < LDA; i += REG_I) {
+ /* Load C in register blocks. */
+ Bi = B + i;
+ for (ri = 0; ri < REG_I; ri++) {
+ for (rj = 0; rj < REG_J; rj++) {
+ c[ri][rj] = Cj[i + ri + ( rj)*LDA];
+ }
+ }
+
+
+ for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) {
+ /* Load a,b in register blocks. */
+ /* for (rj = 0; rj < REG_J; rj++) {
+ a[rj] = A[(j + rj)*LDA + k];
+ }*/
+ /* for (ri = 0; ri < REG_I; ri++) { */
+ /* b[ri] = Bi[k*LDA + ri]; */
+ /* } */
+ /* /\* Compute C in register blocks. *\/ */
+ /* for (rj = 0; rj < REG_J; rj++) { */
+ /* a[rj] = Aj[( rj)*LDA + k]; */
+ /* for (ri = 0; ri < REG_I; ri++) { */
+ /* c[ri][rj] += a[rj] * b[ri]; */
+ /* } */
+ /* } */
+ a[0] = Aj[k];
+ a[1] = Aj[k + LDA];
+ b[0] = Bi[k*LDA];
+ b[1] = Bi[k*LDA + 1];
+ b[2] = Bi[k*LDA + 2];
+ b[3] = Bi[k*LDA + 3];
+ b[4] = Bi[k*LDA + 4];
+ b[5] = Bi[k*LDA + 5];
+ b[6] = Bi[k*LDA + 6];
+ b[7] = Bi[k*LDA + 7];
+
+
+ c[0][0] += b[0] * a[0];
+ c[0][1] += b[0] * a[1];
+ c[1][0] += b[1] * a[0];
+ c[1][1] += b[1] * a[1];
+ c[2][0] += b[2] * a[0];
+ c[2][1] += b[2] * a[1];
+ c[3][0] += b[3] * a[0];
+ c[3][1] += b[3] * a[1];
+ c[4][0] += b[4] * a[0];
+ c[4][1] += b[4] * a[1];
+ c[5][0] += b[5] * a[0];
+ c[5][1] += b[5] * a[1];
+ c[6][0] += b[6] * a[0];
+ c[6][1] += b[6] * a[1];
+ c[7][0] += b[7] * a[0];
+ c[7][1] += b[7] * a[1];
+
+
+ /* c[0][0] += b[0] * a[0]; */
+ /* c[1][1] += b[1] * a[1]; */
+ /* c[2][0] += b[2] * a[0]; */
+ /* c[3][1] += b[3] * a[1]; */
+ /* c[4][0] += b[4] * a[0]; */
+ /* c[5][1] += b[5] * a[1]; */
+ /* c[6][0] += b[6] * a[0]; */
+ /* c[7][1] += b[7] * a[1]; */
+ /* c[0][0] += b[0] * a[0]; */
+ /* c[1][1] += b[1] * a[1]; */
+ /* c[2][0] += b[2] * a[0]; */
+ /* c[3][1] += b[3] * a[1]; */
+ /* c[4][0] += b[4] * a[0]; */
+ /* c[5][1] += b[5] * a[1]; */
+ /* c[6][0] += b[6] * a[0]; */
+ /* c[7][1] += b[7] * a[1]; */
+
+ }
+
+ /* store C in register blocks. */
+ for (ri = 0; ri < REG_I; ri++) {
+ for (rj = 0; rj < REG_J; rj++) {
+ Cj[i + ri + (rj)*LDA] = c[ri][rj];
+ }
+ }
+ }
+
+
+
+
+ }
+ /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */
+ } else {
+ if (coreid > 0)
+ return;
+
+ for ( i = 0; i < lda; i++ )
+ for ( j = 0; j < lda; j++ )
+ for ( k = 0; k < lda; k++ )
+ C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#define REG_I 8
-#define REG_J 2
-//#define BLOCK_I 32
-#define BLOCK_J 16
-#define BLOCK_K 16
-#define LDA 32
-#define NCORES 2
-#define MIN(X,Y) (X < Y ? X : Y)
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k, ri, rj, ii, jj, kk;
- data_t *Aj, *Cj, *Bi;
- data_t c[REG_I][REG_J], a[REG_J], b[REG_I];
- size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES));
-
- /* if (coreid > 0) { */
- /* return; */
- /* } */
- /* start = 0, end = lda; */
- if (ncores == NCORES && lda == LDA) {
- for (jj = start; jj < end; jj += BLOCK_J)
- for (kk = 0; kk < LDA; kk += BLOCK_K)
- //for (ii = 0; ii < LDA; ii += BLOCK_I)
- for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) {
- Aj = A + j*LDA;
- Cj = C + j*LDA;
- for (i = 0; i < LDA; i += REG_I) {
- /* Load C in register blocks. */
- Bi = B + i;
- for (ri = 0; ri < REG_I; ri++) {
- for (rj = 0; rj < REG_J; rj++) {
- c[ri][rj] = Cj[i + ri + ( rj)*LDA];
- }
- }
-
-
- for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) {
- /* Load a,b in register blocks. */
- /* for (rj = 0; rj < REG_J; rj++) {
- a[rj] = A[(j + rj)*LDA + k];
- }*/
- /* for (ri = 0; ri < REG_I; ri++) { */
- /* b[ri] = Bi[k*LDA + ri]; */
- /* } */
- /* /\* Compute C in register blocks. *\/ */
- /* for (rj = 0; rj < REG_J; rj++) { */
- /* a[rj] = Aj[( rj)*LDA + k]; */
- /* for (ri = 0; ri < REG_I; ri++) { */
- /* c[ri][rj] += a[rj] * b[ri]; */
- /* } */
- /* } */
- a[0] = Aj[k];
- a[1] = Aj[k + LDA];
- b[0] = Bi[k*LDA];
- b[1] = Bi[k*LDA + 1];
- b[2] = Bi[k*LDA + 2];
- b[3] = Bi[k*LDA + 3];
- b[4] = Bi[k*LDA + 4];
- b[5] = Bi[k*LDA + 5];
- b[6] = Bi[k*LDA + 6];
- b[7] = Bi[k*LDA + 7];
-
-
- c[0][0] += b[0] * a[0];
- c[0][1] += b[0] * a[1];
- c[1][0] += b[1] * a[0];
- c[1][1] += b[1] * a[1];
- c[2][0] += b[2] * a[0];
- c[2][1] += b[2] * a[1];
- c[3][0] += b[3] * a[0];
- c[3][1] += b[3] * a[1];
- c[4][0] += b[4] * a[0];
- c[4][1] += b[4] * a[1];
- c[5][0] += b[5] * a[0];
- c[5][1] += b[5] * a[1];
- c[6][0] += b[6] * a[0];
- c[6][1] += b[6] * a[1];
- c[7][0] += b[7] * a[0];
- c[7][1] += b[7] * a[1];
-
-
- /* c[0][0] += b[0] * a[0]; */
- /* c[1][1] += b[1] * a[1]; */
- /* c[2][0] += b[2] * a[0]; */
- /* c[3][1] += b[3] * a[1]; */
- /* c[4][0] += b[4] * a[0]; */
- /* c[5][1] += b[5] * a[1]; */
- /* c[6][0] += b[6] * a[0]; */
- /* c[7][1] += b[7] * a[1]; */
- /* c[0][0] += b[0] * a[0]; */
- /* c[1][1] += b[1] * a[1]; */
- /* c[2][0] += b[2] * a[0]; */
- /* c[3][1] += b[3] * a[1]; */
- /* c[4][0] += b[4] * a[0]; */
- /* c[5][1] += b[5] * a[1]; */
- /* c[6][0] += b[6] * a[0]; */
- /* c[7][1] += b[7] * a[1]; */
-
- }
-
- /* store C in register blocks. */
- for (ri = 0; ri < REG_I; ri++) {
- for (rj = 0; rj < REG_J; rj++) {
- Cj[i + ri + (rj)*LDA] = c[ri][rj];
- }
- }
- }
-
-
-
-
- }
- /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */
- } else {
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- for ( k = 0; k < lda; k++ )
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// /* // Execute the provided, naive matmul */
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#define REG_I 8
-#define REG_J 2
-#define BLOCK_I 32
-#define BLOCK_J 16
-#define BLOCK_K 16
-#define LDA 32
-#define NCORES 2
-#define MIN(X,Y) (X < Y ? X : Y)
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k, ri, rj, ii, jj, kk;
- data_t *Aj, *Cj, *Bi;
- data_t c[REG_I][REG_J], a[REG_J], b[REG_I];
- size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES));
-
- /* if (coreid > 0) { */
- /* return; */
- /* } */
- /* start = 0, end = lda; */
- if (ncores == NCORES && lda == LDA) {
- for (jj = start; jj < end; jj += BLOCK_J) {
- int kk_start= (coreid == 0 ? 0 : LDA/2) ,kk_end = (coreid == 0 ? LDA/2 : LDA);
- for (kk = kk_start; kk < kk_end; kk += BLOCK_K) {
- // for (ii = 0; ii < LDA; ii += BLOCK_I)
- for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) {
- Aj = A + j*LDA;
- Cj = C + j*LDA;
- for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) {
- /* Load C in register blocks. */
- Bi = B + i;
- for (ri = 0; ri < REG_I; ri++) {
- for (rj = 0; rj < REG_J; rj++) {
- c[ri][rj] = Cj[i + ri + ( rj)*LDA];
- }
- }
-
-
- for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) {
- for (ri = 0; ri < REG_I; ri++) {
- b[ri] = Bi[k*LDA + ri];
- }
- /* Compute C in register blocks. */
- for (rj = 0; rj < REG_J; rj++) {
- a[rj] = Aj[(rj)*LDA + k];
- for (ri = 0; ri < REG_I; ri++) {
- c[ri][rj] += a[rj] * b[ri];
- }
- }
- }
-
- /* store C in register blocks. */
- for (ri = 0; ri < REG_I; ri++) {
- for (rj = 0; rj < REG_J; rj++) {
- Cj[i + ri + ( rj)*LDA] = c[ri][rj];
- }
- }
- }
- }
- /* barrier(nc); */
-
- /* kk_start= (coreid == 1 ? 0 : LDA/2); */
- /* kk_end = (coreid == 1 ? LDA/2 : LDA); */
- /* for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { */
- /* // for (ii = 0; ii < LDA; ii += BLOCK_I) */
- /* for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { */
- /* Aj = A + j*LDA; */
- /* Cj = C + j*LDA; */
- /* for (i = 0; i < LDA/\*, ii + BLOCK_I)*\/; i += REG_I) { */
- /* /\* Load C in register blocks. *\/ */
- /* Bi = B + i; */
- /* for (ri = 0; ri < REG_I; ri++) { */
- /* for (rj = 0; rj < REG_J; rj++) { */
- /* c[ri][rj] = Cj[i + ri + ( rj)*LDA]; */
- /* } */
- /* } */
-
-
- /* for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { */
- /* for (ri = 0; ri < REG_I; ri++) { */
- /* b[ri] = Bi[k*LDA + ri]; */
- /* } */
- /* /\* Compute C in register blocks. *\/ */
- /* for (rj = 0; rj < REG_J; rj++) { */
- /* a[rj] = Aj[(rj)*LDA + k]; */
- /* for (ri = 0; ri < REG_I; ri++) { */
- /* c[ri][rj] += a[rj] * b[ri]; */
- /* } */
- /* } */
- /* } */
-
- /* store C in register blocks. */
- /* for (ri = 0; ri < REG_I; ri++) { */
- /* for (rj = 0; rj < REG_J; rj++) { */
- /* Cj[i + ri + ( rj)*LDA] = c[ri][rj]; */
- /* } */
- /* } */
- /* } */
- /* } */
- }
- }
-
-
- //barrier(nc);
- for (jj = start; jj < end; jj += BLOCK_J) {
- int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA);
- for (kk = kk_start; kk < kk_end; kk += BLOCK_K) {
- // for (ii = 0; ii < LDA; ii += BLOCK_I)
- for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) {
- Aj = A + j*LDA;
- Cj = C + j*LDA;
- for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) {
- /* Load C in register blocks. */
- Bi = B + i;
- for (ri = 0; ri < REG_I; ri++) {
- for (rj = 0; rj < REG_J; rj++) {
- c[ri][rj] = Cj[i + ri + ( rj)*LDA];
- }
- }
-
-
- for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) {
- for (ri = 0; ri < REG_I; ri++) {
- b[ri] = Bi[k*LDA + ri];
- }
- /* Compute C in register blocks. */
- for (rj = 0; rj < REG_J; rj++) {
- a[rj] = Aj[(rj)*LDA + k];
- for (ri = 0; ri < REG_I; ri++) {
- c[ri][rj] += a[rj] * b[ri];
- }
- }
- }
-
- /* store C in register blocks. */
- for (ri = 0; ri < REG_I; ri++) {
- for (rj = 0; rj < REG_J; rj++) {
- Cj[i + ri + ( rj)*LDA] = c[ri][rj];
- }
- }
- }
- }
- }
- }
- /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */
-} else {
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- for ( k = 0; k < lda; k++ )
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// /* // Execute the provided, naive matmul */
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-#define MIN(X,Y) (X < Y ? X : Y)
-#define MAX(X,Y) (X > Y ? X : Y)
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i, start = coreid * (n / ncores), end = (coreid == ncores - 1 ? n : (coreid + 1) * (n / ncores));
-
- for (i = start; i < end; i++) {
- x[i] += y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k , jj , kk;
- int start_i = coreid*lda/2;
- int end_i = start_i + lda/2;
- int step_j, step_k;
- int start_k, end_k, start_j, end_j;
- int j_lda;
- int pos_A , pos_B, pos_C;
- data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07;
- data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17;
- data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7;
-
- temp00 = 0;
- temp01 = 0;
- temp02 = 0;
- temp03 = 0;
- temp04 = 0;
- temp05 = 0;
- temp06 = 0;
- temp07 = 0;
-
- temp10 = 0;
- temp11 = 0;
- temp12 = 0;
- temp13 = 0;
- temp14 = 0;
- temp15 = 0;
- temp16 = 0;
- temp17 = 0;
-
- if (coreid == 0)
- {
- step_k = 1;
- start_k= 0;
- end_k = lda;
-
- step_j = 2;
- start_j= 0;
- end_j = lda;
-
- }else
- {
-
- step_k = -1;
- start_k = lda-1;
- end_k = -1;
-
- step_j = -2;
- start_j= lda-2;
- end_j = -2;
- }
-
- for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) )
- {
- for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) )
- {
- for ( i = start_i; i < end_i; i+=8 )
- {
- //pos_C = i + jj*lda;
- for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j )
- {
-
- pos_C = i + j*lda;
- temp00 = C[(pos_C + 0)];
- temp01 = C[(pos_C + 1)];
- temp02 = C[(pos_C + 2)];
- temp03 = C[(pos_C + 3)];
- temp04 = C[(pos_C + 4)];
- temp05 = C[(pos_C + 5)];
- temp06 = C[(pos_C + 6)];
- temp07 = C[(pos_C + 7)];
-
- //pos_C += lda;
- pos_C = i + (j+1)*lda;
-
- temp10 = C[(pos_C + 0)];
- temp11 = C[(pos_C + 1)];
- temp12 = C[(pos_C + 2)];
- temp13 = C[(pos_C + 3)];
- temp14 = C[(pos_C + 4)];
- temp15 = C[(pos_C + 5)];
- temp16 = C[(pos_C + 6)];
- temp17 = C[(pos_C + 7)];
-
- pos_B = kk*lda + i;
- pos_A = j*lda + kk;
- for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k )
- {
- temp_A0 = A[ pos_A ] ;
- temp_A1 = A[pos_A +lda];
-
- temp00 += temp_A0 * B[(pos_B + 0)];
- temp01 += temp_A0 * B[(pos_B + 1)];
- temp02 += temp_A0 * B[(pos_B + 2)];
- temp03 += temp_A0 * B[(pos_B + 3)];
- temp04 += temp_A0 * B[(pos_B + 4)];
- temp05 += temp_A0 * B[(pos_B + 5)];
- temp06 += temp_A0 * B[(pos_B + 6)];
- temp07 += temp_A0 * B[(pos_B + 7)];
-
- temp10 += temp_A1 * B[(pos_B + 0)];
- temp11 += temp_A1 * B[(pos_B + 1)];
- temp12 += temp_A1 * B[(pos_B + 2)];
- temp13 += temp_A1 * B[(pos_B + 3)];
- temp14 += temp_A1 * B[(pos_B + 4)];
- temp15 += temp_A1 * B[(pos_B + 5)];
- temp16 += temp_A1 * B[(pos_B + 6)];
- temp17 += temp_A1 * B[(pos_B + 7)];
-
- pos_B += (lda*step_k) ;
- pos_A += step_k;
- }
- //barrier(nc);
-
- C[(pos_C + 0)] = temp10;
- C[(pos_C + 1)] = temp11;
- C[(pos_C + 2)] = temp12;
- C[(pos_C + 3)] = temp13;
- C[(pos_C + 4)] = temp14;
- C[(pos_C + 5)] = temp15;
- C[(pos_C + 6)] = temp16;
- C[(pos_C + 7)] = temp17;
- //barrier(nc);
-
- pos_C = i + j*lda;
- //pos_C -= lda;
- C[(pos_C + 0)] = temp00;
- C[(pos_C + 1)] = temp01;
- C[(pos_C + 2)] = temp02;
- C[(pos_C + 3)] = temp03;
- C[(pos_C + 4)] = temp04;
- C[(pos_C + 5)] = temp05;
- C[(pos_C + 6)] = temp06;
- C[(pos_C + 7)] = temp07;
- //barrier(nc);
- //pos_C += step_j * lda;
- }
- //barrier(nc);
- }
- //barrier(nc);
-
- }
- //barrier(nc);
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
- /*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- */
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
-
- //printf("input1_data");
-exit(0);
-
-}
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k , jj , kk;
- int start_i = coreid*lda/2;
- int end_i = start_i + lda/2;
- int step_j, step_k;
- int start_k, end_k, start_j, end_j;
- int j_lda;
- int pos_A , pos_B, pos_C;
- data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07;
- data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17;
- data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7;
-
- temp00 = 0;
- temp01 = 0;
- temp02 = 0;
- temp03 = 0;
- temp04 = 0;
- temp05 = 0;
- temp06 = 0;
- temp07 = 0;
-
- temp10 = 0;
- temp11 = 0;
- temp12 = 0;
- temp13 = 0;
- temp14 = 0;
- temp15 = 0;
- temp16 = 0;
- temp17 = 0;
-
- if (coreid == 0)
- {
- step_k = 1;
- start_k= 0;
- end_k = lda;
-
- step_j = 2;
- start_j= 0;
- end_j = lda;
-
- }else
- {
-
- step_k = -1;
- start_k = lda-1;
- end_k = -1;
-
- step_j = -2;
- start_j= lda-2;
- end_j = -2;
- }
-
- for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) )
- {
- for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) )
- {
- for ( i = start_i; i < end_i; i+=8 )
- {
- //pos_C = i + jj*lda;
- for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j )
- {
-
- pos_C = i + j*lda;
- temp00 = C[(pos_C + 0)];
- temp01 = C[(pos_C + 1)];
- temp02 = C[(pos_C + 2)];
- temp03 = C[(pos_C + 3)];
- temp04 = C[(pos_C + 4)];
- temp05 = C[(pos_C + 5)];
- temp06 = C[(pos_C + 6)];
- temp07 = C[(pos_C + 7)];
-
- //pos_C += lda;
- pos_C = i + (j+1)*lda;
-
- temp10 = C[(pos_C + 0)];
- temp11 = C[(pos_C + 1)];
- temp12 = C[(pos_C + 2)];
- temp13 = C[(pos_C + 3)];
- temp14 = C[(pos_C + 4)];
- temp15 = C[(pos_C + 5)];
- temp16 = C[(pos_C + 6)];
- temp17 = C[(pos_C + 7)];
-
- pos_B = kk*lda + i;
- pos_A = j*lda + kk;
- for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k )
- {
- temp_A0 = A[ pos_A ] ;
- temp_A1 = A[pos_A +lda];
-
- temp00 += temp_A0 * B[(pos_B + 0)];
- temp01 += temp_A0 * B[(pos_B + 1)];
- temp02 += temp_A0 * B[(pos_B + 2)];
- temp03 += temp_A0 * B[(pos_B + 3)];
- temp04 += temp_A0 * B[(pos_B + 4)];
- temp05 += temp_A0 * B[(pos_B + 5)];
- temp06 += temp_A0 * B[(pos_B + 6)];
- temp07 += temp_A0 * B[(pos_B + 7)];
-
- temp10 += temp_A1 * B[(pos_B + 0)];
- temp11 += temp_A1 * B[(pos_B + 1)];
- temp12 += temp_A1 * B[(pos_B + 2)];
- temp13 += temp_A1 * B[(pos_B + 3)];
- temp14 += temp_A1 * B[(pos_B + 4)];
- temp15 += temp_A1 * B[(pos_B + 5)];
- temp16 += temp_A1 * B[(pos_B + 6)];
- temp17 += temp_A1 * B[(pos_B + 7)];
-
- pos_B += (lda*step_k) ;
- pos_A += step_k;
- }
- //barrier(nc);
-
- C[(pos_C + 0)] = temp10;
- C[(pos_C + 1)] = temp11;
- C[(pos_C + 2)] = temp12;
- C[(pos_C + 3)] = temp13;
- C[(pos_C + 4)] = temp14;
- C[(pos_C + 5)] = temp15;
- C[(pos_C + 6)] = temp16;
- C[(pos_C + 7)] = temp17;
- //barrier(nc);
-
- pos_C = i + j*lda;
- //pos_C -= lda;
- C[(pos_C + 0)] = temp00;
- C[(pos_C + 1)] = temp01;
- C[(pos_C + 2)] = temp02;
- C[(pos_C + 3)] = temp03;
- C[(pos_C + 4)] = temp04;
- C[(pos_C + 5)] = temp05;
- C[(pos_C + 6)] = temp06;
- C[(pos_C + 7)] = temp07;
- //barrier(nc);
- //pos_C += step_j * lda;
- }
- //barrier(nc);
- }
- //barrier(nc);
-
- }
- //barrier(nc);
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
- /*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- */
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
-
- //printf("input1_data");
-exit(0);
-
-}
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- size_t index;
- for (i = 0; i < (n/ncores); i++){
- index = i + coreid*(n/ncores);
- x[index] = x[index] + y[index];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int j, k, i;
+ data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
+ if(coreid == 0) {
+ for(j = 0; j < 32; j++) {
+ temp0 = 0; //C[j*lda];
+ temp1 = 0; //C[1 + j*lda];
+ temp2 = 0; //C[2 + j*lda];
+ temp3 = 0; //C[3 + j*lda];
+ temp4 = 0; //C[4 + j*lda];
+ temp5 = 0; //C[5 + j*lda];
+ temp6 = 0; //C[6 + j*lda];
+ temp7 = 0; //C[7 + j*lda];
+ temp8 = 0; //C[8 + j*lda];
+ temp9 = 0; //C[9 + j*lda];
+ temp10 = 0; //C[10 + j*lda];
+ temp11 = 0; //C[11 + j*lda];
+ temp12 = 0; //C[12 + j*lda];
+ temp13 = 0; //C[13 + j*lda];
+ temp14 = 0; //C[14 + j*lda];
+ temp15 = 0; //C[15 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[k*lda];
+ temp1 += A[j*lda + k] * B[1+k*lda];
+ temp2 += A[j*lda + k] * B[2+k*lda];
+ temp3 += A[j*lda + k] * B[3+k*lda];
+ temp4 += A[j*lda + k] * B[4+k*lda];
+ temp5 += A[j*lda + k] * B[5+k*lda];
+ temp6 += A[j*lda + k] * B[6+k*lda];
+ temp7 += A[j*lda + k] * B[7+k*lda];
+ temp8 += A[j*lda + k] * B[8+k*lda];
+ temp9 += A[j*lda + k] * B[9+k*lda];
+ temp10 += A[j*lda + k] * B[10+k*lda];
+ temp11 += A[j*lda + k] * B[11+k*lda];
+ temp12 += A[j*lda + k] * B[12+k*lda];
+ temp13 += A[j*lda + k] * B[13+k*lda];
+ temp14 += A[j*lda + k] * B[14+k*lda];
+ temp15 += A[j*lda + k] * B[15+k*lda];
+ }
+ C[j*lda] = temp0;
+ C[1 + j*lda] = temp1;
+ C[2 + j*lda] = temp2;
+ C[3 + j*lda] = temp3;
+ C[4 + j*lda] = temp4;
+ C[5 + j*lda] = temp5;
+ C[6 + j*lda] = temp6;
+ C[7 + j*lda] = temp7;
+ C[8 + j*lda] = temp8;
+ C[9 + j*lda] = temp9;
+ C[10 + j*lda] = temp10;
+ C[11 + j*lda] = temp11;
+ C[12 + j*lda] = temp12;
+ C[13 + j*lda] = temp13;
+ C[14 + j*lda] = temp14;
+ C[15 + j*lda] = temp15;
+ }
+ }
+
+ if(coreid == 1 || ncores == 1) {
+ for(j = 0; j < 32; j++) {
+ temp0 = 0; //C[16+j*lda];
+ temp1 = 0; //C[17+j*lda];
+ temp2 = 0; //C[18+j*lda];
+ temp3 = 0; //C[19+j*lda];
+ temp4 = 0; //C[20+j*lda];
+ temp5 = 0; //C[21+j*lda];
+ temp6 = 0; //C[22+j*lda];
+ temp7 = 0; //C[23+j*lda];
+ temp8 = 0; //C[24+j*lda];
+ temp9 = 0; //C[25+j*lda];
+ temp10 = 0; //C[26+j*lda];
+ temp11 = 0; //C[27+j*lda];
+ temp12 = 0; //C[28+j*lda];
+ temp13 = 0; //C[29+j*lda];
+ temp14 = 0; //C[30+j*lda];
+ temp15 = 0; //C[31+j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[16+k*lda];
+ temp1 += A[j*lda + k] * B[17+k*lda];
+ temp2 += A[j*lda + k] * B[18+k*lda];
+ temp3 += A[j*lda + k] * B[19+k*lda];
+ temp4 += A[j*lda + k] * B[20+k*lda];
+ temp5 += A[j*lda + k] * B[21+k*lda];
+ temp6 += A[j*lda + k] * B[22+k*lda];
+ temp7 += A[j*lda + k] * B[23+k*lda];
+ temp8 += A[j*lda + k] * B[24+k*lda];
+ temp9 += A[j*lda + k] * B[25+k*lda];
+ temp10 += A[j*lda + k] * B[26+k*lda];
+ temp11 += A[j*lda + k] * B[27+k*lda];
+ temp12 += A[j*lda + k] * B[28+k*lda];
+ temp13 += A[j*lda + k] * B[29+k*lda];
+ temp14 += A[j*lda + k] * B[30+k*lda];
+ temp15 += A[j*lda + k] * B[31+k*lda];
+ }
+ C[16 + j*lda] = temp0;
+ C[17 + j*lda] = temp1;
+ C[18 + j*lda] = temp2;
+ C[19 + j*lda] = temp3;
+ C[20 + j*lda] = temp4;
+ C[21 + j*lda] = temp5;
+ C[22 + j*lda] = temp6;
+ C[23 + j*lda] = temp7;
+ C[24 + j*lda] = temp8;
+ C[25 + j*lda] = temp9;
+ C[26 + j*lda] = temp10;
+ C[27 + j*lda] = temp11;
+ C[28 + j*lda] = temp12;
+ C[29 + j*lda] = temp13;
+ C[30 + j*lda] = temp14;
+ C[31 + j*lda] = temp15;
+ }
+ }
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int j, k, i;
- data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
- if(coreid == 0) {
- for(j = 0; j < 32; j++) {
- temp0 = 0; //C[j*lda];
- temp1 = 0; //C[1 + j*lda];
- temp2 = 0; //C[2 + j*lda];
- temp3 = 0; //C[3 + j*lda];
- temp4 = 0; //C[4 + j*lda];
- temp5 = 0; //C[5 + j*lda];
- temp6 = 0; //C[6 + j*lda];
- temp7 = 0; //C[7 + j*lda];
- temp8 = 0; //C[8 + j*lda];
- temp9 = 0; //C[9 + j*lda];
- temp10 = 0; //C[10 + j*lda];
- temp11 = 0; //C[11 + j*lda];
- temp12 = 0; //C[12 + j*lda];
- temp13 = 0; //C[13 + j*lda];
- temp14 = 0; //C[14 + j*lda];
- temp15 = 0; //C[15 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[k*lda];
- temp1 += A[j*lda + k] * B[1+k*lda];
- temp2 += A[j*lda + k] * B[2+k*lda];
- temp3 += A[j*lda + k] * B[3+k*lda];
- temp4 += A[j*lda + k] * B[4+k*lda];
- temp5 += A[j*lda + k] * B[5+k*lda];
- temp6 += A[j*lda + k] * B[6+k*lda];
- temp7 += A[j*lda + k] * B[7+k*lda];
- temp8 += A[j*lda + k] * B[8+k*lda];
- temp9 += A[j*lda + k] * B[9+k*lda];
- temp10 += A[j*lda + k] * B[10+k*lda];
- temp11 += A[j*lda + k] * B[11+k*lda];
- temp12 += A[j*lda + k] * B[12+k*lda];
- temp13 += A[j*lda + k] * B[13+k*lda];
- temp14 += A[j*lda + k] * B[14+k*lda];
- temp15 += A[j*lda + k] * B[15+k*lda];
- }
- C[j*lda] = temp0;
- C[1 + j*lda] = temp1;
- C[2 + j*lda] = temp2;
- C[3 + j*lda] = temp3;
- C[4 + j*lda] = temp4;
- C[5 + j*lda] = temp5;
- C[6 + j*lda] = temp6;
- C[7 + j*lda] = temp7;
- C[8 + j*lda] = temp8;
- C[9 + j*lda] = temp9;
- C[10 + j*lda] = temp10;
- C[11 + j*lda] = temp11;
- C[12 + j*lda] = temp12;
- C[13 + j*lda] = temp13;
- C[14 + j*lda] = temp14;
- C[15 + j*lda] = temp15;
- }
- }
-
- else {
- for(j = 0; j < 32; j++) {
- temp0 = 0; //C[16+j*lda];
- temp1 = 0; //C[17+j*lda];
- temp2 = 0; //C[18+j*lda];
- temp3 = 0; //C[19+j*lda];
- temp4 = 0; //C[20+j*lda];
- temp5 = 0; //C[21+j*lda];
- temp6 = 0; //C[22+j*lda];
- temp7 = 0; //C[23+j*lda];
- temp8 = 0; //C[24+j*lda];
- temp9 = 0; //C[25+j*lda];
- temp10 = 0; //C[26+j*lda];
- temp11 = 0; //C[27+j*lda];
- temp12 = 0; //C[28+j*lda];
- temp13 = 0; //C[29+j*lda];
- temp14 = 0; //C[30+j*lda];
- temp15 = 0; //C[31+j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[16+k*lda];
- temp1 += A[j*lda + k] * B[17+k*lda];
- temp2 += A[j*lda + k] * B[18+k*lda];
- temp3 += A[j*lda + k] * B[19+k*lda];
- temp4 += A[j*lda + k] * B[20+k*lda];
- temp5 += A[j*lda + k] * B[21+k*lda];
- temp6 += A[j*lda + k] * B[22+k*lda];
- temp7 += A[j*lda + k] * B[23+k*lda];
- temp8 += A[j*lda + k] * B[24+k*lda];
- temp9 += A[j*lda + k] * B[25+k*lda];
- temp10 += A[j*lda + k] * B[26+k*lda];
- temp11 += A[j*lda + k] * B[27+k*lda];
- temp12 += A[j*lda + k] * B[28+k*lda];
- temp13 += A[j*lda + k] * B[29+k*lda];
- temp14 += A[j*lda + k] * B[30+k*lda];
- temp15 += A[j*lda + k] * B[31+k*lda];
- }
- C[16 + j*lda] = temp0;
- C[17 + j*lda] = temp1;
- C[18 + j*lda] = temp2;
- C[19 + j*lda] = temp3;
- C[20 + j*lda] = temp4;
- C[21 + j*lda] = temp5;
- C[22 + j*lda] = temp6;
- C[23 + j*lda] = temp7;
- C[24 + j*lda] = temp8;
- C[25 + j*lda] = temp9;
- C[26 + j*lda] = temp10;
- C[27 + j*lda] = temp11;
- C[28 + j*lda] = temp12;
- C[29 + j*lda] = temp13;
- C[30 + j*lda] = temp14;
- C[31 + j*lda] = temp15;
- }
- }
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int j, k;
- data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
- if(coreid == 0) {
- //16*0:16*(0+1) ;; 16*1+16*(1+1)
- //0:16 ;; 16:32
-
- //complete Q1
- for(j = 0; j < 16; j++) {
- temp0 = C[j*lda];
- temp1 = C[1 + j*lda];
- temp2 = C[2 + j*lda];
- temp3 = C[3 + j*lda];
- temp4 = C[4 + j*lda];
- temp5 = C[5 + j*lda];
- temp6 = C[6 + j*lda];
- temp7 = C[7 + j*lda];
- temp8 = C[8 + j*lda];
- temp9 = C[9 + j*lda];
- temp10 = C[10 + j*lda];
- temp11 = C[11 + j*lda];
- temp12 = C[12 + j*lda];
- temp13 = C[13 + j*lda];
- temp14 = C[14 + j*lda];
- temp15 = C[15 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[k*lda];
- temp1 += A[j*lda + k] * B[1+k*lda];
- temp2 += A[j*lda + k] * B[2+k*lda];
- temp3 += A[j*lda + k] * B[3+k*lda];
- temp4 += A[j*lda + k] * B[4+k*lda];
- temp5 += A[j*lda + k] * B[5+k*lda];
- temp6 += A[j*lda + k] * B[6+k*lda];
- temp7 += A[j*lda + k] * B[7+k*lda];
- temp8 += A[j*lda + k] * B[8+k*lda];
- temp9 += A[j*lda + k] * B[9+k*lda];
- temp10 += A[j*lda + k] * B[10+k*lda];
- temp11 += A[j*lda + k] * B[11+k*lda];
- temp12 += A[j*lda + k] * B[12+k*lda];
- temp13 += A[j*lda + k] * B[13+k*lda];
- temp14 += A[j*lda + k] * B[14+k*lda];
- temp15 += A[j*lda + k] * B[15+k*lda];
- }
- C[j*lda] = temp0;
- C[1 + j*lda] = temp1;
- C[2 + j*lda] = temp2;
- C[3 + j*lda] = temp3;
- C[4 + j*lda] = temp4;
- C[5 + j*lda] = temp5;
- C[6 + j*lda] = temp6;
- C[7 + j*lda] = temp7;
- C[8 + j*lda] = temp8;
- C[9 + j*lda] = temp9;
- C[10 + j*lda] = temp10;
- C[11 + j*lda] = temp11;
- C[12 + j*lda] = temp12;
- C[13 + j*lda] = temp13;
- C[14 + j*lda] = temp14;
- C[15 + j*lda] = temp15;
- }
- for(j = 16; j < 32; j++) {
- temp0 = C[j*lda];
- temp1 = C[1 + j*lda];
- temp2 = C[2 + j*lda];
- temp3 = C[3 + j*lda];
- temp4 = C[4 + j*lda];
- temp5 = C[5 + j*lda];
- temp6 = C[6 + j*lda];
- temp7 = C[7 + j*lda];
- temp8 = C[8 + j*lda];
- temp9 = C[9 + j*lda];
- temp10 = C[10 + j*lda];
- temp11 = C[11 + j*lda];
- temp12 = C[12 + j*lda];
- temp13 = C[13 + j*lda];
- temp14 = C[14 + j*lda];
- temp15 = C[15 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[k*lda];
- temp1 += A[j*lda + k] * B[1+k*lda];
- temp2 += A[j*lda + k] * B[2+k*lda];
- temp3 += A[j*lda + k] * B[3+k*lda];
- temp4 += A[j*lda + k] * B[4+k*lda];
- temp5 += A[j*lda + k] * B[5+k*lda];
- temp6 += A[j*lda + k] * B[6+k*lda];
- temp7 += A[j*lda + k] * B[7+k*lda];
- temp8 += A[j*lda + k] * B[8+k*lda];
- temp9 += A[j*lda + k] * B[9+k*lda];
- temp10 += A[j*lda + k] * B[10+k*lda];
- temp11 += A[j*lda + k] * B[11+k*lda];
- temp12 += A[j*lda + k] * B[12+k*lda];
- temp13 += A[j*lda + k] * B[13+k*lda];
- temp14 += A[j*lda + k] * B[14+k*lda];
- temp15 += A[j*lda + k] * B[15+k*lda];
- }
- C[j*lda] = temp0;
- C[1 + j*lda] = temp1;
- C[2 + j*lda] = temp2;
- C[3 + j*lda] = temp3;
- C[4 + j*lda] = temp4;
- C[5 + j*lda] = temp5;
- C[6 + j*lda] = temp6;
- C[7 + j*lda] = temp7;
- C[8 + j*lda] = temp8;
- C[9 + j*lda] = temp9;
- C[10 + j*lda] = temp10;
- C[11 + j*lda] = temp11;
- C[12 + j*lda] = temp12;
- C[13 + j*lda] = temp13;
- C[14 + j*lda] = temp14;
- C[15 + j*lda] = temp15;
- }
- }
- //16*(2-1) : 16*2 ;; 16*(1-1) : 16*1
- //16:32 ;; 0:16
- if(coreid == 1) {
- //complete Q3
- for(j = 16; j < 32; j++) {
- temp0 = C[16+j*lda];
- temp1 = C[17+j*lda];
- temp2 = C[18+j*lda];
- temp3 = C[19+j*lda];
- temp4 = C[20+j*lda];
- temp5 = C[21+j*lda];
- temp6 = C[22+j*lda];
- temp7 = C[23+j*lda];
- temp8 = C[24+j*lda];
- temp9 = C[25+j*lda];
- temp10 = C[26+j*lda];
- temp11 = C[27+j*lda];
- temp12 = C[28+j*lda];
- temp13 = C[29+j*lda];
- temp14 = C[30+j*lda];
- temp15 = C[31+j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[16+k*lda];
- temp1 += A[j*lda + k] * B[17+k*lda];
- temp2 += A[j*lda + k] * B[18+k*lda];
- temp3 += A[j*lda + k] * B[19+k*lda];
- temp4 += A[j*lda + k] * B[20+k*lda];
- temp5 += A[j*lda + k] * B[21+k*lda];
- temp6 += A[j*lda + k] * B[22+k*lda];
- temp7 += A[j*lda + k] * B[23+k*lda];
- temp8 += A[j*lda + k] * B[24+k*lda];
- temp9 += A[j*lda + k] * B[25+k*lda];
- temp10 += A[j*lda + k] * B[26+k*lda];
- temp11 += A[j*lda + k] * B[27+k*lda];
- temp12 += A[j*lda + k] * B[28+k*lda];
- temp13 += A[j*lda + k] * B[29+k*lda];
- temp14 += A[j*lda + k] * B[30+k*lda];
- temp15 += A[j*lda + k] * B[31+k*lda];
- }
- C[16 + j*lda] = temp0;
- C[17 + j*lda] = temp1;
- C[18 + j*lda] = temp2;
- C[19 + j*lda] = temp3;
- C[20 + j*lda] = temp4;
- C[21 + j*lda] = temp5;
- C[22 + j*lda] = temp6;
- C[23 + j*lda] = temp7;
- C[24 + j*lda] = temp8;
- C[25 + j*lda] = temp9;
- C[26 + j*lda] = temp10;
- C[27 + j*lda] = temp11;
- C[28 + j*lda] = temp12;
- C[29 + j*lda] = temp13;
- C[30 + j*lda] = temp14;
- C[31 + j*lda] = temp15;
- }
- //complete Q4
- for(j = 0; j < 16; j++) {
- temp0 = C[16 + j*lda];
- temp1 = C[17 + j*lda];
- temp2 = C[18 + j*lda];
- temp3 = C[19 + j*lda];
- temp4 = C[20 + j*lda];
- temp5 = C[21 + j*lda];
- temp6 = C[22 + j*lda];
- temp7 = C[23 + j*lda];
- temp8 = C[24 + j*lda];
- temp9 = C[25 + j*lda];
- temp10 = C[26 + j*lda];
- temp11 = C[27 + j*lda];
- temp12 = C[28 + j*lda];
- temp13 = C[29 + j*lda];
- temp14 = C[30 + j*lda];
- temp15 = C[31 + j*lda];
- for(k = 0; k < 32; k++) {
- temp0 += A[j*lda + k] * B[16 + k*lda];
- temp1 += A[j*lda + k] * B[17 + k*lda];
- temp2 += A[j*lda + k] * B[18 + k*lda];
- temp3 += A[j*lda + k] * B[19 + k*lda];
- temp4 += A[j*lda + k] * B[20 + k*lda];
- temp5 += A[j*lda + k] * B[21 + k*lda];
- temp6 += A[j*lda + k] * B[22 + k*lda];
- temp7 += A[j*lda + k] * B[23 + k*lda];
- temp8 += A[j*lda + k] * B[24 + k*lda];
- temp9 += A[j*lda + k] * B[25 + k*lda];
- temp10 += A[j*lda + k] * B[26 + k*lda];
- temp11 += A[j*lda + k] * B[27 + k*lda];
- temp12 += A[j*lda + k] * B[28 + k*lda];
- temp13 += A[j*lda + k] * B[29 + k*lda];
- temp14 += A[j*lda + k] * B[30 + k*lda];
- temp15 += A[j*lda + k] * B[31 + k*lda];
- }
- C[16 + j*lda] = temp0;
- C[17 + j*lda] = temp1;
- C[18 + j*lda] = temp2;
- C[19 + j*lda] = temp3;
- C[20 + j*lda] = temp4;
- C[21 + j*lda] = temp5;
- C[22 + j*lda] = temp6;
- C[23 + j*lda] = temp7;
- C[24 + j*lda] = temp8;
- C[25 + j*lda] = temp9;
- C[26 + j*lda] = temp10;
- C[27 + j*lda] = temp11;
- C[28 + j*lda] = temp12;
- C[29 + j*lda] = temp13;
- C[30 + j*lda] = temp14;
- C[31 + j*lda] = temp15;
- }
- }
-
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < n/2; i++)
- {
- x[i] = x[i] + y[i];
- }
- }
- if (coreid == 1)
- {
- for (i = n/2; i < n; i++)
- {
- x[i] = x[i] + y[i];
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ if(coreid > 1) return;
+
+ int m, i, j, k, iB0, iB1;
+ data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7;
+ data_t tempA0, tempA1;
+
+ if (coreid == 0){
+ for (m = 0; m < 2; m++){
+ for (j = 0; j < lda/2; j++){
+ for (i = 0; i < lda; i+=8){
+ tempC0 = C[i + j*lda];
+ tempC1 = C[i + j*lda+1];
+ tempC2 = C[i + j*lda+2];
+ tempC3 = C[i + j*lda+3];
+ tempC4 = C[i + j*lda+4];
+ tempC5 = C[i + j*lda+5];
+ tempC6 = C[i + j*lda+6];
+ tempC7 = C[i + j*lda+7];
+ iB0 = m*lda*lda/2+i;
+ iB1 = iB0+lda;
+ for (k = m*lda/2; k < (m+1)*lda/2; k+=2){
+ tempA0 = A[j*lda+k];
+ tempA1 = A[j*lda+k+1];
+ tempC0 += tempA0*B[iB0]+tempA1*B[iB1];
+ tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1];
+ tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2];
+ tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3];
+ tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4];
+ tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5];
+ tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6];
+ tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7];
+ iB0 += 2*lda;
+ iB1 += 2*lda;
+
+ }
+ C[i + j*lda] = tempC0;
+ C[i + j*lda + 1] = tempC1;
+ C[i + j*lda + 2] = tempC2;
+ C[i + j*lda + 3] = tempC3;
+ C[i + j*lda + 4] = tempC4;
+ C[i + j*lda + 5] = tempC5;
+ C[i + j*lda + 6] = tempC6;
+ C[i + j*lda + 7] = tempC7;
+ }
+ }
+ }
+ }
+ if(coreid == 1 || ncores == 1) {
+ for (m = 2; m > 0; m--){
+ for (j = lda-1; j >= lda/2; j--){
+ for (i = lda-1; i >= 0; i-=8){
+ tempC0 = C[i + j*lda];
+ tempC1 = C[i + j*lda - 1];
+ tempC2 = C[i + j*lda - 2];
+ tempC3 = C[i + j*lda - 3];
+ tempC4 = C[i + j*lda - 4];
+ tempC5 = C[i + j*lda - 5];
+ tempC6 = C[i + j*lda - 6];
+ tempC7 = C[i + j*lda - 7];
+ for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){
+ tempA0 = A[j*lda+k];
+ tempA1 = A[j*lda+k-1];
+ tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i];
+ tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1];
+ tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2];
+ tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3];
+ tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4];
+ tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5];
+ tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6];
+ tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7];
+ }
+ C[i + j*lda] = tempC0;
+ C[i + j*lda - 1] = tempC1;
+ C[i + j*lda - 2] = tempC2;
+ C[i + j*lda - 3] = tempC3;
+ C[i + j*lda - 4] = tempC4;
+ C[i + j*lda - 5] = tempC5;
+ C[i + j*lda - 6] = tempC6;
+ C[i + j*lda - 7] = tempC7;
+ }
+ }
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int m, i, j, k, iB0, iB1;
- data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7;
- data_t tempA0, tempA1;
-
- if (coreid == 0){
- for (m = 0; m < 2; m++){
- for (j = 0; j < lda/2; j++){
- for (i = 0; i < lda; i+=8){
- tempC0 = C[i + j*lda];
- tempC1 = C[i + j*lda+1];
- tempC2 = C[i + j*lda+2];
- tempC3 = C[i + j*lda+3];
- tempC4 = C[i + j*lda+4];
- tempC5 = C[i + j*lda+5];
- tempC6 = C[i + j*lda+6];
- tempC7 = C[i + j*lda+7];
- iB0 = m*lda*lda/2+i;
- iB1 = iB0+lda;
- for (k = m*lda/2; k < (m+1)*lda/2; k+=2){
- tempA0 = A[j*lda+k];
- tempA1 = A[j*lda+k+1];
- tempC0 += tempA0*B[iB0]+tempA1*B[iB1];
- tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1];
- tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2];
- tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3];
- tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4];
- tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5];
- tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6];
- tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7];
- iB0 += 2*lda;
- iB1 += 2*lda;
-
- }
- C[i + j*lda] = tempC0;
- C[i + j*lda + 1] = tempC1;
- C[i + j*lda + 2] = tempC2;
- C[i + j*lda + 3] = tempC3;
- C[i + j*lda + 4] = tempC4;
- C[i + j*lda + 5] = tempC5;
- C[i + j*lda + 6] = tempC6;
- C[i + j*lda + 7] = tempC7;
- }
- }
- }
- } else {
- for (m = 2; m > 0; m--){
- for (j = lda-1; j >= lda/2; j--){
- for (i = lda-1; i >= 0; i-=8){
- tempC0 = C[i + j*lda];
- tempC1 = C[i + j*lda - 1];
- tempC2 = C[i + j*lda - 2];
- tempC3 = C[i + j*lda - 3];
- tempC4 = C[i + j*lda - 4];
- tempC5 = C[i + j*lda - 5];
- tempC6 = C[i + j*lda - 6];
- tempC7 = C[i + j*lda - 7];
- for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){
- tempA0 = A[j*lda+k];
- tempA1 = A[j*lda+k-1];
- tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i];
- tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1];
- tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2];
- tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3];
- tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4];
- tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5];
- tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6];
- tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7];
- }
- C[i + j*lda] = tempC0;
- C[i + j*lda - 1] = tempC1;
- C[i + j*lda - 2] = tempC2;
- C[i + j*lda - 3] = tempC3;
- C[i + j*lda - 4] = tempC4;
- C[i + j*lda - 5] = tempC5;
- C[i + j*lda - 6] = tempC6;
- C[i + j*lda - 7] = tempC7;
- }
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int m, i, j, k, iB0, iB1;
- data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7;
- data_t tempA0, tempA1;
-
- if (coreid == 0){
- for (m = 0; m < 2; m++){
- for (j = 0; j < lda/2; j++){
- for (i = 0; i < lda; i+=8){
- tempC0 = C[i + j*lda];
- tempC1 = C[i + j*lda+1];
- tempC2 = C[i + j*lda+2];
- tempC3 = C[i + j*lda+3];
- tempC4 = C[i + j*lda+4];
- tempC5 = C[i + j*lda+5];
- tempC6 = C[i + j*lda+6];
- tempC7 = C[i + j*lda+7];
- iB0 = m*lda*lda/2+i;
- iB1 = iB0+lda;
- for (k = m*lda/2; k < (m+1)*lda/2; k+=2){
- tempA0 = A[j*lda+k];
- tempA1 = A[j*lda+k+1];
- tempC0 += tempA0*B[iB0]+tempA1*B[iB1];
- tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1];
- tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2];
- tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3];
- tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4];
- tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5];
- tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6];
- tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7];
- iB0 += 2*lda;
- iB1 += 2*lda;
-
- }
- C[i + j*lda] = tempC0;
- C[i + j*lda + 1] = tempC1;
- C[i + j*lda + 2] = tempC2;
- C[i + j*lda + 3] = tempC3;
- C[i + j*lda + 4] = tempC4;
- C[i + j*lda + 5] = tempC5;
- C[i + j*lda + 6] = tempC6;
- C[i + j*lda + 7] = tempC7;
- }
- }
- }
- } else {
- for (m = 2; m > 0; m--){
- for (j = lda-1; j >= lda/2; j--){
- for (i = lda-1; i >= 0; i-=8){
- tempC0 = C[i + j*lda];
- tempC1 = C[i + j*lda - 1];
- tempC2 = C[i + j*lda - 2];
- tempC3 = C[i + j*lda - 3];
- tempC4 = C[i + j*lda - 4];
- tempC5 = C[i + j*lda - 5];
- tempC6 = C[i + j*lda - 6];
- tempC7 = C[i + j*lda - 7];
- for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){
- tempA0 = A[j*lda+k];
- tempA1 = A[j*lda+k-1];
- tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i];
- tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1];
- tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2];
- tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3];
- tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4];
- tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5];
- tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6];
- tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7];
- }
- C[i + j*lda] = tempC0;
- C[i + j*lda - 1] = tempC1;
- C[i + j*lda - 2] = tempC2;
- C[i + j*lda - 3] = tempC3;
- C[i + j*lda - 4] = tempC4;
- C[i + j*lda - 5] = tempC5;
- C[i + j*lda - 6] = tempC6;
- C[i + j*lda - 7] = tempC7;
- }
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- size_t m;
- size_t r;
-
- m = n >> 1;
- r = n - 2 * m; //parity check
-
- if (coreid == 0) {
- // printf("Completed number rounding %ld", m);
- }
- for (i = 0; i < m; i = i + 1)
- {
- if (coreid == 0) {
- x[i] = x[i] + y[i];
- } else {
- x[n-1-i] = x[n-1-i] + y[n-1-i];
- }
- }
- //strip the last element if odd
- if (r == 1) {
- x[m] = x[m] + y[m];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"\r
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )\r
+{\r
+ \r
+ // ***************************** //\r
+ // **** ADD YOUR CODE HERE ***** //\r
+ // ***************************** //\r
+ //\r
+ // feel free to make a separate function for MI and MSI versions.\r
+ \r
+ int m, i, j, k, iB0, iB1;\r
+ data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7;\r
+ data_t tempA0, tempA1;\r
+ \r
+ if(coreid > 1) return;
+ if (coreid == 0){\r
+ for (m = 0; m < 2; m++){\r
+ for (j = 0; j < lda/2; j++){\r
+ for (i = 0; i < lda; i+=8){\r
+ tempC0 = C[i + j*lda];\r
+ tempC1 = C[i + j*lda+1];\r
+ tempC2 = C[i + j*lda+2];\r
+ tempC3 = C[i + j*lda+3];\r
+ tempC4 = C[i + j*lda+4];\r
+ tempC5 = C[i + j*lda+5];\r
+ tempC6 = C[i + j*lda+6];\r
+ tempC7 = C[i + j*lda+7];\r
+ iB0 = m*lda*lda/2+i;\r
+ iB1 = iB0+lda;\r
+ for (k = m*lda/2; k < (m+1)*lda/2; k+=2){\r
+ tempA0 = A[j*lda+k];\r
+ tempA1 = A[j*lda+k+1];\r
+ tempC0 += tempA0*B[iB0]+tempA1*B[iB1];\r
+ tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1];\r
+ tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2];\r
+ tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3];\r
+ tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4];\r
+ tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5];\r
+ tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6];\r
+ tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7];\r
+ iB0 += 2*lda;\r
+ iB1 += 2*lda;\r
+ \r
+ }\r
+ C[i + j*lda] = tempC0;\r
+ C[i + j*lda + 1] = tempC1;\r
+ C[i + j*lda + 2] = tempC2;\r
+ C[i + j*lda + 3] = tempC3;\r
+ C[i + j*lda + 4] = tempC4;\r
+ C[i + j*lda + 5] = tempC5;\r
+ C[i + j*lda + 6] = tempC6;\r
+ C[i + j*lda + 7] = tempC7;\r
+ }\r
+ }\r
+ }\r
+ }
+ if(coreid == 1 || ncores == 1) {\r
+ for (m = 2; m > 0; m--){\r
+ for (j = lda-1; j >= lda/2; j--){\r
+ for (i = lda-1; i >= 0; i-=8){\r
+ tempC0 = C[i + j*lda];\r
+ tempC1 = C[i + j*lda - 1];\r
+ tempC2 = C[i + j*lda - 2];\r
+ tempC3 = C[i + j*lda - 3];\r
+ tempC4 = C[i + j*lda - 4];\r
+ tempC5 = C[i + j*lda - 5];\r
+ tempC6 = C[i + j*lda - 6];\r
+ tempC7 = C[i + j*lda - 7];\r
+ for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){\r
+ tempA0 = A[j*lda+k];\r
+ tempA1 = A[j*lda+k-1];\r
+ tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i];\r
+ tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1];\r
+ tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2];\r
+ tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3];\r
+ tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4];\r
+ tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5];\r
+ tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6];\r
+ tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7];\r
+ }\r
+ C[i + j*lda] = tempC0;\r
+ C[i + j*lda - 1] = tempC1;\r
+ C[i + j*lda - 2] = tempC2;\r
+ C[i + j*lda - 3] = tempC3;\r
+ C[i + j*lda - 4] = tempC4;\r
+ C[i + j*lda - 5] = tempC5;\r
+ C[i + j*lda - 6] = tempC6;\r
+ C[i + j*lda - 7] = tempC7;\r
+ }\r
+ }\r
+ }\r
+ }\r
+}\r
+++ /dev/null
-//**************************************************************************\r
-// Multi-threaded Matrix Multiply benchmark\r
-//--------------------------------------------------------------------------\r
-// TA : Christopher Celio\r
-// Student:\r
-//\r
-//\r
-// This benchmark multiplies two 2-D arrays together and writes the results to\r
-// a third vector. The input data (and reference data) should be generated\r
-// using the matmul_gendata.pl perl script and dumped to a file named\r
-// dataset.h.\r
-\r
-\r
-// print out arrays, etc.\r
-//#define DEBUG\r
-\r
-//--------------------------------------------------------------------------\r
-// Includes\r
-\r
-#include <string.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Input/Reference Data\r
-\r
-typedef float data_t;\r
-#include "dataset.h"\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Basic Utilities and Multi-thread Support\r
-\r
-__thread unsigned long coreid;\r
-unsigned long ncores;\r
-\r
-#include "util.h"\r
-\r
-#define stringify_1(s) #s\r
-#define stringify(s) stringify_1(s)\r
-#define stats(code) do { \\r
-unsigned long _c = -rdcycle(), _i = -rdinstret(); \\r
-code; \\r
-_c += rdcycle(), _i += rdinstret(); \\r
-if (coreid == 0) \\r
-printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \\r
-stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \\r
-} while(0)\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Helper functions\r
-\r
-void printArrayMT( char name[], int n, data_t arr[] )\r
-{\r
- int i;\r
- if (coreid != 0)\r
- return;\r
- \r
- printf( " %10s :", name );\r
- for ( i = 0; i < n; i++ )\r
- printf( " %3ld ", (long) arr[i] );\r
- printf( "\n" );\r
-}\r
-\r
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
-{\r
- if (coreid != 0)\r
- return;\r
- \r
- size_t i;\r
- for (i = 0; i < n; i++)\r
- {\r
- if (test[i] != correct[i])\r
- {\r
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",\r
- i, (long)test[i], i, (long)correct[i]);\r
- exit(-1);\r
- }\r
- }\r
- \r
- return;\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// matmul function\r
-\r
-// single-thread, naive version\r
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- \r
- int i, j, k;\r
- \r
- if (coreid > 0)\r
- return;\r
- \r
- for ( i = 0; i < lda; i++ )\r
- for ( j = 0; j < lda; j++ )\r
- {\r
- for ( k = 0; k < lda; k++ )\r
- {\r
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];\r
- }\r
- }\r
- \r
-}\r
-\r
-\r
-\r
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- \r
- // ***************************** //\r
- // **** ADD YOUR CODE HERE ***** //\r
- // ***************************** //\r
- //\r
- // feel free to make a separate function for MI and MSI versions.\r
- \r
- int m, i, j, k, iB0, iB1;\r
- data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7;\r
- data_t tempA0, tempA1;\r
- \r
- if (coreid == 0){\r
- for (m = 0; m < 2; m++){\r
- for (j = 0; j < lda/2; j++){\r
- for (i = 0; i < lda; i+=8){\r
- tempC0 = C[i + j*lda];\r
- tempC1 = C[i + j*lda+1];\r
- tempC2 = C[i + j*lda+2];\r
- tempC3 = C[i + j*lda+3];\r
- tempC4 = C[i + j*lda+4];\r
- tempC5 = C[i + j*lda+5];\r
- tempC6 = C[i + j*lda+6];\r
- tempC7 = C[i + j*lda+7];\r
- iB0 = m*lda*lda/2+i;\r
- iB1 = iB0+lda;\r
- for (k = m*lda/2; k < (m+1)*lda/2; k+=2){\r
- tempA0 = A[j*lda+k];\r
- tempA1 = A[j*lda+k+1];\r
- tempC0 += tempA0*B[iB0]+tempA1*B[iB1];\r
- tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1];\r
- tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2];\r
- tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3];\r
- tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4];\r
- tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5];\r
- tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6];\r
- tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7];\r
- iB0 += 2*lda;\r
- iB1 += 2*lda;\r
- \r
- }\r
- C[i + j*lda] = tempC0;\r
- C[i + j*lda + 1] = tempC1;\r
- C[i + j*lda + 2] = tempC2;\r
- C[i + j*lda + 3] = tempC3;\r
- C[i + j*lda + 4] = tempC4;\r
- C[i + j*lda + 5] = tempC5;\r
- C[i + j*lda + 6] = tempC6;\r
- C[i + j*lda + 7] = tempC7;\r
- }\r
- }\r
- }\r
- } else {\r
- for (m = 2; m > 0; m--){\r
- for (j = lda-1; j >= lda/2; j--){\r
- for (i = lda-1; i >= 0; i-=8){\r
- tempC0 = C[i + j*lda];\r
- tempC1 = C[i + j*lda - 1];\r
- tempC2 = C[i + j*lda - 2];\r
- tempC3 = C[i + j*lda - 3];\r
- tempC4 = C[i + j*lda - 4];\r
- tempC5 = C[i + j*lda - 5];\r
- tempC6 = C[i + j*lda - 6];\r
- tempC7 = C[i + j*lda - 7];\r
- for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){\r
- tempA0 = A[j*lda+k];\r
- tempA1 = A[j*lda+k-1];\r
- tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i];\r
- tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1];\r
- tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2];\r
- tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3];\r
- tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4];\r
- tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5];\r
- tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6];\r
- tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7];\r
- }\r
- C[i + j*lda] = tempC0;\r
- C[i + j*lda - 1] = tempC1;\r
- C[i + j*lda - 2] = tempC2;\r
- C[i + j*lda - 3] = tempC3;\r
- C[i + j*lda - 4] = tempC4;\r
- C[i + j*lda - 5] = tempC5;\r
- C[i + j*lda - 6] = tempC6;\r
- C[i + j*lda - 7] = tempC7;\r
- }\r
- }\r
- }\r
- }\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// Main\r
-//\r
-// all threads start executing thread_entry(). Use their "coreid" to\r
-// differentiate between threads (each thread is running on a separate core).\r
-\r
-void thread_entry(int cid, int nc)\r
-{\r
- coreid = cid;\r
- ncores = nc;\r
- \r
- // static allocates data in the binary, which is visible to both threads\r
- static data_t results_data[ARRAY_SIZE];\r
- \r
- \r
-// // Execute the provided, naive matmul\r
-// barrier(nc);\r
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
-// \r
-// \r
-// // verify\r
-// verifyMT(ARRAY_SIZE, results_data, verify_data);\r
-// \r
-// // clear results from the first trial\r
-// size_t i;\r
-// if (coreid == 0)\r
-// for (i=0; i < ARRAY_SIZE; i++)\r
-// results_data[i] = 0;\r
-// barrier(nc);\r
- \r
- \r
- // Execute your faster matmul\r
- barrier(nc);\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
- \r
-#ifdef DEBUG\r
- printArrayMT("results:", ARRAY_SIZE, results_data);\r
- printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
-#endif\r
- \r
- // verify\r
- verifyMT(ARRAY_SIZE, results_data, verify_data);\r
- barrier(nc);\r
- \r
- exit(0);\r
-}\r
-\r
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************\r
-// Multi-threaded Matrix Multiply benchmark\r
-//--------------------------------------------------------------------------\r
-// TA : Christopher Celio\r
-// Student:\r
-//\r
-//\r
-// This benchmark multiplies two 2-D arrays together and writes the results to\r
-// a third vector. The input data (and reference data) should be generated\r
-// using the matmul_gendata.pl perl script and dumped to a file named\r
-// dataset.h.\r
-\r
-\r
-// print out arrays, etc.\r
-//#define DEBUG\r
-\r
-//--------------------------------------------------------------------------\r
-// Includes\r
-\r
-#include <string.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Input/Reference Data\r
-\r
-typedef float data_t;\r
-#include "dataset.h"\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Basic Utilities and Multi-thread Support\r
-\r
-__thread unsigned long coreid;\r
-unsigned long ncores;\r
-\r
-#include "util.h"\r
-\r
-#define stringify_1(s) #s\r
-#define stringify(s) stringify_1(s)\r
-#define stats(code) do { \\r
-unsigned long _c = -rdcycle(), _i = -rdinstret(); \\r
-code; \\r
-_c += rdcycle(), _i += rdinstret(); \\r
-if (coreid == 0) \\r
-printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \\r
-stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \\r
-} while(0)\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Helper functions\r
-\r
-void printArrayMT( char name[], int n, data_t arr[] )\r
-{\r
- int i;\r
- if (coreid != 0)\r
- return;\r
- \r
- printf( " %10s :", name );\r
- for ( i = 0; i < n; i++ )\r
- printf( " %3ld ", (long) arr[i] );\r
- printf( "\n" );\r
-}\r
-\r
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
-{\r
- if (coreid != 0)\r
- return;\r
- \r
- size_t i;\r
- for (i = 0; i < n; i++)\r
- {\r
- if (test[i] != correct[i])\r
- {\r
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",\r
- i, (long)test[i], i, (long)correct[i]);\r
- exit(-1);\r
- }\r
- }\r
- \r
- return;\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// matmul function\r
-\r
-// single-thread, naive version\r
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- \r
- int i, j, k;\r
- \r
- if (coreid > 0)\r
- return;\r
- \r
- for ( i = 0; i < lda; i++ )\r
- for ( j = 0; j < lda; j++ )\r
- {\r
- for ( k = 0; k < lda; k++ )\r
- {\r
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];\r
- }\r
- }\r
- \r
-}\r
-\r
-\r
-\r
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )\r
-{\r
- \r
- // ***************************** //\r
- // **** ADD YOUR CODE HERE ***** //\r
- // ***************************** //\r
- //\r
- // feel free to make a separate function for MI and MSI versions.\r
- \r
- int m, i, j, k, iB0, iB1;\r
- data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7;\r
- data_t tempA0, tempA1;\r
- \r
- if (coreid == 0){\r
- for (m = 0; m < 2; m++){\r
- for (j = 0; j < lda/2; j++){\r
- for (i = 0; i < lda; i+=8){\r
- tempC0 = C[i + j*lda];\r
- tempC1 = C[i + j*lda+1];\r
- tempC2 = C[i + j*lda+2];\r
- tempC3 = C[i + j*lda+3];\r
- tempC4 = C[i + j*lda+4];\r
- tempC5 = C[i + j*lda+5];\r
- tempC6 = C[i + j*lda+6];\r
- tempC7 = C[i + j*lda+7];\r
- iB0 = m*lda*lda/2+i;\r
- iB1 = iB0+lda;\r
- for (k = m*lda/2; k < (m+1)*lda/2; k+=2){\r
- tempA0 = A[j*lda+k];\r
- tempA1 = A[j*lda+k+1];\r
- tempC0 += tempA0*B[iB0]+tempA1*B[iB1];\r
- tempC1 += tempA0*B[iB0+1]+tempA1*B[iB1+1];\r
- tempC2 += tempA0*B[iB0+2]+tempA1*B[iB1+2];\r
- tempC3 += tempA0*B[iB0+3]+tempA1*B[iB1+3];\r
- tempC4 += tempA0*B[iB0+4]+tempA1*B[iB1+4];\r
- tempC5 += tempA0*B[iB0+5]+tempA1*B[iB1+5];\r
- tempC6 += tempA0*B[iB0+6]+tempA1*B[iB1+6];\r
- tempC7 += tempA0*B[iB0+7]+tempA1*B[iB1+7];\r
- iB0 += 2*lda;\r
- iB1 += 2*lda;\r
- \r
- }\r
- C[i + j*lda] = tempC0;\r
- C[i + j*lda + 1] = tempC1;\r
- C[i + j*lda + 2] = tempC2;\r
- C[i + j*lda + 3] = tempC3;\r
- C[i + j*lda + 4] = tempC4;\r
- C[i + j*lda + 5] = tempC5;\r
- C[i + j*lda + 6] = tempC6;\r
- C[i + j*lda + 7] = tempC7;\r
- }\r
- }\r
- }\r
- } else {\r
- for (m = 2; m > 0; m--){\r
- for (j = lda-1; j >= lda/2; j--){\r
- for (i = lda-1; i >= 0; i-=8){\r
- tempC0 = C[i + j*lda];\r
- tempC1 = C[i + j*lda - 1];\r
- tempC2 = C[i + j*lda - 2];\r
- tempC3 = C[i + j*lda - 3];\r
- tempC4 = C[i + j*lda - 4];\r
- tempC5 = C[i + j*lda - 5];\r
- tempC6 = C[i + j*lda - 6];\r
- tempC7 = C[i + j*lda - 7];\r
- for (k = m*lda/2-1; k >= (m-1)*lda/2; k-=2){\r
- tempA0 = A[j*lda+k];\r
- tempA1 = A[j*lda+k-1];\r
- tempC0 += tempA0*B[k*lda+i]+tempA1*B[(k-1)*lda+i];\r
- tempC1 += tempA0*B[k*lda+i-1]+tempA1*B[(k-1)*lda+i-1];\r
- tempC2 += tempA0*B[k*lda+i-2]+tempA1*B[(k-1)*lda+i-2];\r
- tempC3 += tempA0*B[k*lda+i-3]+tempA1*B[(k-1)*lda+i-3];\r
- tempC4 += tempA0*B[k*lda+i-4]+tempA1*B[(k-1)*lda+i-4];\r
- tempC5 += tempA0*B[k*lda+i-5]+tempA1*B[(k-1)*lda+i-5];\r
- tempC6 += tempA0*B[k*lda+i-6]+tempA1*B[(k-1)*lda+i-6];\r
- tempC7 += tempA0*B[k*lda+i-7]+tempA1*B[(k-1)*lda+i-7];\r
- }\r
- C[i + j*lda] = tempC0;\r
- C[i + j*lda - 1] = tempC1;\r
- C[i + j*lda - 2] = tempC2;\r
- C[i + j*lda - 3] = tempC3;\r
- C[i + j*lda - 4] = tempC4;\r
- C[i + j*lda - 5] = tempC5;\r
- C[i + j*lda - 6] = tempC6;\r
- C[i + j*lda - 7] = tempC7;\r
- }\r
- }\r
- }\r
- }\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// Main\r
-//\r
-// all threads start executing thread_entry(). Use their "coreid" to\r
-// differentiate between threads (each thread is running on a separate core).\r
-\r
-void thread_entry(int cid, int nc)\r
-{\r
- coreid = cid;\r
- ncores = nc;\r
- \r
- // static allocates data in the binary, which is visible to both threads\r
- static data_t results_data[ARRAY_SIZE];\r
- \r
- \r
-// // Execute the provided, naive matmul\r
-// barrier(nc);\r
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
-// \r
-// \r
-// // verify\r
-// verifyMT(ARRAY_SIZE, results_data, verify_data);\r
-// \r
-// // clear results from the first trial\r
-// size_t i;\r
-// if (coreid == 0)\r
-// for (i=0; i < ARRAY_SIZE; i++)\r
-// results_data[i] = 0;\r
-// barrier(nc);\r
- \r
- \r
- // Execute your faster matmul\r
- barrier(nc);\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
- \r
-#ifdef DEBUG\r
- printArrayMT("results:", ARRAY_SIZE, results_data);\r
- printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
-#endif\r
- \r
- // verify\r
- verifyMT(ARRAY_SIZE, results_data, verify_data);\r
- barrier(nc);\r
- \r
- exit(0);\r
-}\r
-\r
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- for (i = coreid*n/ncores; i < (coreid+1)*n/ncores; i++){
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k, ii, jj, kk;
+ if(coreid > 1) return;
+ if (coreid == 0) {
+// for ( ii = 0; ii < 32; ii+=IC )
+ for ( kk = 0; kk < 32; kk+=16 )
+ for ( j = 0; j < 16; j++ )
+// for ( j = 0; j < 16; j++ )
+ {
+ for ( i = 0; i < 32; i+=8 )
+// for ( i = ii; i < ii + IC && i < 32; i+=8 )
+ {
+ data_t temp0 = C[i+j*32];
+ data_t temp1 = C[i+j*32+1];
+ data_t temp2 = C[i+j*32+2];
+ data_t temp3 = C[i+j*32+3];
+ data_t temp4 = C[i+j*32+4];
+ data_t temp5 = C[i+j*32+5];
+ data_t temp6 = C[i+j*32+6];
+ data_t temp7 = C[i+j*32+7];
+ for ( k = kk; k < kk+16 && k < 32; k++ )
+// for ( k = 0; k < 32; k++ )
+ {
+ data_t tempA = A[j*32+k];
+ temp0 += tempA * B[k*32 + i];
+ temp1 += tempA * B[k*32 + i+1];
+ temp2 += tempA * B[k*32 + i+2];
+ temp3 += tempA * B[k*32 + i+3];
+ temp4 += tempA * B[k*32 + i+4];
+ temp5 += tempA * B[k*32 + i+5];
+ temp6 += tempA * B[k*32 + i+6];
+ temp7 += tempA * B[k*32 + i+7];
+ }
+ C[i+j*32] = temp0;
+ C[i+j*32+1] = temp1;
+ C[i+j*32+2] = temp2;
+ C[i+j*32+3] = temp3;
+ C[i+j*32+4] = temp4;
+ C[i+j*32+5] = temp5;
+ C[i+j*32+6] = temp6;
+ C[i+j*32+7] = temp7;
+ }
+ }
+ }
+ if(coreid == 1 || ncores == 1) {
+// for ( ii = 0; ii < 32; ii+=IC )
+ for ( kk = 0; kk < 32; kk+=16 )
+ for ( j = 16; j < 32; j++ )
+// for ( j = 16; j < 32; j++ )
+ {
+ for ( i = 0; i < 32; i+=8 )
+// for ( i = ii; i < ii + IC && i < 32; i+=8 )
+ {
+ data_t temp0 = C[i+j*32];
+ data_t temp1 = C[i+j*32+1];
+ data_t temp2 = C[i+j*32+2];
+ data_t temp3 = C[i+j*32+3];
+ data_t temp4 = C[i+j*32+4];
+ data_t temp5 = C[i+j*32+5];
+ data_t temp6 = C[i+j*32+6];
+ data_t temp7 = C[i+j*32+7];
+ for ( k = kk; k < kk+16 && k < 32; k++ )
+ {
+ data_t tempA = A[j*32+k];
+ temp0 += tempA * B[k*32 + i];
+ temp1 += tempA * B[k*32 + i+1];
+ temp2 += tempA * B[k*32 + i+2];
+ temp3 += tempA * B[k*32 + i+3];
+ temp4 += tempA * B[k*32 + i+4];
+ temp5 += tempA * B[k*32 + i+5];
+ temp6 += tempA * B[k*32 + i+6];
+ temp7 += tempA * B[k*32 + i+7];
+ }
+ C[i+j*32] = temp0;
+ C[i+j*32+1] = temp1;
+ C[i+j*32+2] = temp2;
+ C[i+j*32+3] = temp3;
+ C[i+j*32+4] = temp4;
+ C[i+j*32+5] = temp5;
+ C[i+j*32+6] = temp6;
+ C[i+j*32+7] = temp7;
+ }
+
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-///*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-//*/
- /*
- int i, j, k, kk;
- if (coreid) {
- for ( i = 0; i < 16; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
- for ( i = 16; i < 32; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- */
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- if (coreid) {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = 0; j < 16; j++ )
-// for ( j = 0; j < 16; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = 16; j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-///*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-//*/
- /*
- int i, j, k, kk;
- if (coreid) {
- for ( i = 0; i < 16; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
- for ( i = 16; i < 32; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- */
-}
-
-
-#define KC 16
-#define IC 16
-#define JC 16
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- if (coreid) {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( jj = 0; jj < 16; jj+=16 )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = jj; j < jj+16 && j < 16; j++ )
-// for ( j = 0; j < 16; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( jj = 16; jj < 32; jj+= 16 ) {
- for ( kk = 16; kk < 32; kk+=16 )
- for ( j = jj; j < jj+16 && j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- for ( kk = 0; kk < 16; kk+=16 )
- for ( j = jj; j < jj+16 && j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-///*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-//*/
- /*
- int i, j, k, kk;
- if (coreid) {
- for ( i = 0; i < 16; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
- for ( i = 16; i < 32; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- */
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- if (coreid) {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = 0; j < 16; j++ )
-// for ( j = 0; j < 16; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = 16; j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- if (coreid) {
- for (i = 0; i < n / 2; i++)
- x[i] = x[i] + y[i];
- } else {
- for (i = n / 2; i < n; i++)
- x[i] = x[i] + y[i];
- }
-/*
- for ( i = (coreid * n) / ncores; i < ((coreid+1)*n)/ncores; i++ ) {
- x[i] = x[i] + y[i];
- }
-*/
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k;
+ int space=lda/ncores;
+ int max= space*coreid+space;
+ data_t temp=0;
+
+ data_t temp1=0;
+ data_t temp2=0;
+ data_t temp3=0;
+ data_t temp4=0;
+
+ data_t temp_1=0;
+
+ data_t temp1_1=0;
+ data_t temp2_1=0;
+ data_t temp3_1=0;
+ data_t temp4_1=0;
+
+ data_t temp_2=0;
+
+ data_t temp1_2=0;
+ data_t temp2_2=0;
+ data_t temp3_2=0;
+ data_t temp4_2=0;
+
+ data_t temp_3=0;
+
+ data_t temp1_3=0;
+ data_t temp2_3=0;
+ data_t temp3_3=0;
+ data_t temp4_3=0;
+
+ if (coreid!=ncores-1){
+ //main loop
+ for (i=space*coreid;i<max/4*4;i+=4)
+ {
+ for(j=0;j<lda;j+=4)
+ {
+ temp1=C[j+i*lda];
+ temp2=C[j+1+i*lda];
+ temp3=C[j+2+i*lda];
+ temp4=C[j+3+i*lda];
+
+ temp1_1=C[j+(i+1)*lda];
+ temp2_1=C[j+1+(i+1)*lda];
+ temp3_1=C[j+2+(i+1)*lda];
+ temp4_1=C[j+3+(i+1)*lda];
+
+ temp1_2=C[j+(i+2)*lda];
+ temp2_2=C[j+1+(i+2)*lda];
+ temp3_2=C[j+2+(i+2)*lda];
+ temp4_2=C[j+3+(i+2)*lda];
+
+ temp1_3=C[j+(i+3)*lda];
+ temp2_3=C[j+1+(i+3)*lda];
+ temp3_3=C[j+2+(i+3)*lda];
+ temp4_3=C[j+3+(i+3)*lda];
+ for (k=0;k<lda;k++)
+ {
+ temp=A[k+i*lda];
+ temp1+=temp*B[j+k*lda];
+ temp2+=temp*B[j+1+k*lda];
+ temp3+=temp*B[j+2+k*lda];
+ temp4+=temp*B[j+3+k*lda];
+
+ temp_1=A[k+(i+1)*lda];
+ temp1_1+=temp_1*B[j+k*lda];
+ temp2_1+=temp_1*B[j+1+k*lda];
+ temp3_1+=temp_1*B[j+2+k*lda];
+ temp4_1+=temp_1*B[j+3+k*lda];
+
+ temp_2=A[k+(i+2)*lda];
+ temp1_2+=temp_2*B[j+k*lda];
+ temp2_2+=temp_2*B[j+1+k*lda];
+ temp3_2+=temp_2*B[j+2+k*lda];
+ temp4_2+=temp_2*B[j+3+k*lda];
+
+ temp_3=A[k+(i+3)*lda];
+ temp1_3+=temp_3*B[j+k*lda];
+ temp2_3+=temp_3*B[j+1+k*lda];
+ temp3_3+=temp_3*B[j+2+k*lda];
+ temp4_3+=temp_3*B[j+3+k*lda];
+
+ }
+ C[j+i*lda]=temp1;
+ C[j+1+i*lda]=temp2;
+ C[j+2+i*lda]=temp3;
+ C[j+3+i*lda]=temp4;
+
+ C[j+(i+1)*lda]=temp1_1;
+ C[j+1+(i+1)*lda]=temp2_1;
+ C[j+2+(i+1)*lda]=temp3_1;
+ C[j+3+(i+1)*lda]=temp4_1;
+
+ C[j+(i+2)*lda]=temp1_2;
+ C[j+1+(i+2)*lda]=temp2_2;
+ C[j+2+(i+2)*lda]=temp3_2;
+ C[j+3+(i+2)*lda]=temp4_2;
+
+ C[j+(i+3)*lda]=temp1_3;
+ C[j+1+(i+3)*lda]=temp2_3;
+ C[j+2+(i+3)*lda]=temp3_3;
+ C[j+3+(i+3)*lda]=temp4_3;
+
+ }
+
+ }
+
+
+
+ }
+
+ //second core
+ else{
+ for (i=space*coreid;i<lda/4*4;i+=4)
+ {
+ for(j=0;j<lda;j+=4)
+ {
+ temp1=C[j+i*lda];
+ temp2=C[j+1+i*lda];
+ temp3=C[j+2+i*lda];
+ temp4=C[j+3+i*lda];
+
+ temp1_1=C[j+(i+1)*lda];
+ temp2_1=C[j+1+(i+1)*lda];
+ temp3_1=C[j+2+(i+1)*lda];
+ temp4_1=C[j+3+(i+1)*lda];
+
+ temp1_2=C[j+(i+2)*lda];
+ temp2_2=C[j+1+(i+2)*lda];
+ temp3_2=C[j+2+(i+2)*lda];
+ temp4_2=C[j+3+(i+2)*lda];
+
+ temp1_3=C[j+(i+3)*lda];
+ temp2_3=C[j+1+(i+3)*lda];
+ temp3_3=C[j+2+(i+3)*lda];
+ temp4_3=C[j+3+(i+3)*lda];
+ for (k=0;k<lda;k++)
+ {
+ temp=A[k+i*lda];
+ temp1+=temp*B[j+k*lda];
+ temp2+=temp*B[j+1+k*lda];
+ temp3+=temp*B[j+2+k*lda];
+ temp4+=temp*B[j+3+k*lda];
+
+ temp_1=A[k+(i+1)*lda];
+ temp1_1+=temp_1*B[j+k*lda];
+ temp2_1+=temp_1*B[j+1+k*lda];
+ temp3_1+=temp_1*B[j+2+k*lda];
+ temp4_1+=temp_1*B[j+3+k*lda];
+
+ temp_2=A[k+(i+2)*lda];
+ temp1_2+=temp_2*B[j+k*lda];
+ temp2_2+=temp_2*B[j+1+k*lda];
+ temp3_2+=temp_2*B[j+2+k*lda];
+ temp4_2+=temp_2*B[j+3+k*lda];
+
+ temp_3=A[k+(i+3)*lda];
+ temp1_3+=temp_3*B[j+k*lda];
+ temp2_3+=temp_3*B[j+1+k*lda];
+ temp3_3+=temp_3*B[j+2+k*lda];
+ temp4_3+=temp_3*B[j+3+k*lda];
+
+ }
+ C[j+i*lda]=temp1;
+ C[j+1+i*lda]=temp2;
+ C[j+2+i*lda]=temp3;
+ C[j+3+i*lda]=temp4;
+
+ C[j+(i+1)*lda]=temp1_1;
+ C[j+1+(i+1)*lda]=temp2_1;
+ C[j+2+(i+1)*lda]=temp3_1;
+ C[j+3+(i+1)*lda]=temp4_1;
+
+ C[j+(i+2)*lda]=temp1_2;
+ C[j+1+(i+2)*lda]=temp2_2;
+ C[j+2+(i+2)*lda]=temp3_2;
+ C[j+3+(i+2)*lda]=temp4_2;
+
+ C[j+(i+3)*lda]=temp1_3;
+ C[j+1+(i+3)*lda]=temp2_3;
+ C[j+2+(i+3)*lda]=temp3_3;
+ C[j+3+(i+3)*lda]=temp4_3;
+
+ }
+
+ }
+
+
+ }
+
+
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k;
- int space=lda/ncores;
- int max= space*coreid+space;
- data_t temp=0;
-
- data_t temp1=0;
- data_t temp2=0;
- data_t temp3=0;
- data_t temp4=0;
-
- data_t temp_1=0;
-
- data_t temp1_1=0;
- data_t temp2_1=0;
- data_t temp3_1=0;
- data_t temp4_1=0;
-
- data_t temp_2=0;
-
- data_t temp1_2=0;
- data_t temp2_2=0;
- data_t temp3_2=0;
- data_t temp4_2=0;
-
- data_t temp_3=0;
-
- data_t temp1_3=0;
- data_t temp2_3=0;
- data_t temp3_3=0;
- data_t temp4_3=0;
-
- if (coreid!=ncores-1){
- //main loop
- for (i=space*coreid;i<max/4*4;i+=4)
- {
- for(j=0;j<lda;j+=4)
- {
- temp1=C[j+i*lda];
- temp2=C[j+1+i*lda];
- temp3=C[j+2+i*lda];
- temp4=C[j+3+i*lda];
-
- temp1_1=C[j+(i+1)*lda];
- temp2_1=C[j+1+(i+1)*lda];
- temp3_1=C[j+2+(i+1)*lda];
- temp4_1=C[j+3+(i+1)*lda];
-
- temp1_2=C[j+(i+2)*lda];
- temp2_2=C[j+1+(i+2)*lda];
- temp3_2=C[j+2+(i+2)*lda];
- temp4_2=C[j+3+(i+2)*lda];
-
- temp1_3=C[j+(i+3)*lda];
- temp2_3=C[j+1+(i+3)*lda];
- temp3_3=C[j+2+(i+3)*lda];
- temp4_3=C[j+3+(i+3)*lda];
- for (k=0;k<lda;k++)
- {
- temp=A[k+i*lda];
- temp1+=temp*B[j+k*lda];
- temp2+=temp*B[j+1+k*lda];
- temp3+=temp*B[j+2+k*lda];
- temp4+=temp*B[j+3+k*lda];
-
- temp_1=A[k+(i+1)*lda];
- temp1_1+=temp_1*B[j+k*lda];
- temp2_1+=temp_1*B[j+1+k*lda];
- temp3_1+=temp_1*B[j+2+k*lda];
- temp4_1+=temp_1*B[j+3+k*lda];
-
- temp_2=A[k+(i+2)*lda];
- temp1_2+=temp_2*B[j+k*lda];
- temp2_2+=temp_2*B[j+1+k*lda];
- temp3_2+=temp_2*B[j+2+k*lda];
- temp4_2+=temp_2*B[j+3+k*lda];
-
- temp_3=A[k+(i+3)*lda];
- temp1_3+=temp_3*B[j+k*lda];
- temp2_3+=temp_3*B[j+1+k*lda];
- temp3_3+=temp_3*B[j+2+k*lda];
- temp4_3+=temp_3*B[j+3+k*lda];
-
- }
- C[j+i*lda]=temp1;
- C[j+1+i*lda]=temp2;
- C[j+2+i*lda]=temp3;
- C[j+3+i*lda]=temp4;
-
- C[j+(i+1)*lda]=temp1_1;
- C[j+1+(i+1)*lda]=temp2_1;
- C[j+2+(i+1)*lda]=temp3_1;
- C[j+3+(i+1)*lda]=temp4_1;
-
- C[j+(i+2)*lda]=temp1_2;
- C[j+1+(i+2)*lda]=temp2_2;
- C[j+2+(i+2)*lda]=temp3_2;
- C[j+3+(i+2)*lda]=temp4_2;
-
- C[j+(i+3)*lda]=temp1_3;
- C[j+1+(i+3)*lda]=temp2_3;
- C[j+2+(i+3)*lda]=temp3_3;
- C[j+3+(i+3)*lda]=temp4_3;
-
- }
-
- }
-
-
-
- }
-
- //second core
- else{
- for (i=space*coreid;i<lda/4*4;i+=4)
- {
- for(j=0;j<lda;j+=4)
- {
- temp1=C[j+i*lda];
- temp2=C[j+1+i*lda];
- temp3=C[j+2+i*lda];
- temp4=C[j+3+i*lda];
-
- temp1_1=C[j+(i+1)*lda];
- temp2_1=C[j+1+(i+1)*lda];
- temp3_1=C[j+2+(i+1)*lda];
- temp4_1=C[j+3+(i+1)*lda];
-
- temp1_2=C[j+(i+2)*lda];
- temp2_2=C[j+1+(i+2)*lda];
- temp3_2=C[j+2+(i+2)*lda];
- temp4_2=C[j+3+(i+2)*lda];
-
- temp1_3=C[j+(i+3)*lda];
- temp2_3=C[j+1+(i+3)*lda];
- temp3_3=C[j+2+(i+3)*lda];
- temp4_3=C[j+3+(i+3)*lda];
- for (k=0;k<lda;k++)
- {
- temp=A[k+i*lda];
- temp1+=temp*B[j+k*lda];
- temp2+=temp*B[j+1+k*lda];
- temp3+=temp*B[j+2+k*lda];
- temp4+=temp*B[j+3+k*lda];
-
- temp_1=A[k+(i+1)*lda];
- temp1_1+=temp_1*B[j+k*lda];
- temp2_1+=temp_1*B[j+1+k*lda];
- temp3_1+=temp_1*B[j+2+k*lda];
- temp4_1+=temp_1*B[j+3+k*lda];
-
- temp_2=A[k+(i+2)*lda];
- temp1_2+=temp_2*B[j+k*lda];
- temp2_2+=temp_2*B[j+1+k*lda];
- temp3_2+=temp_2*B[j+2+k*lda];
- temp4_2+=temp_2*B[j+3+k*lda];
-
- temp_3=A[k+(i+3)*lda];
- temp1_3+=temp_3*B[j+k*lda];
- temp2_3+=temp_3*B[j+1+k*lda];
- temp3_3+=temp_3*B[j+2+k*lda];
- temp4_3+=temp_3*B[j+3+k*lda];
-
- }
- C[j+i*lda]=temp1;
- C[j+1+i*lda]=temp2;
- C[j+2+i*lda]=temp3;
- C[j+3+i*lda]=temp4;
-
- C[j+(i+1)*lda]=temp1_1;
- C[j+1+(i+1)*lda]=temp2_1;
- C[j+2+(i+1)*lda]=temp3_1;
- C[j+3+(i+1)*lda]=temp4_1;
-
- C[j+(i+2)*lda]=temp1_2;
- C[j+1+(i+2)*lda]=temp2_2;
- C[j+2+(i+2)*lda]=temp3_2;
- C[j+3+(i+2)*lda]=temp4_2;
-
- C[j+(i+3)*lda]=temp1_3;
- C[j+1+(i+3)*lda]=temp2_3;
- C[j+2+(i+3)*lda]=temp3_3;
- C[j+3+(i+3)*lda]=temp4_3;
-
- }
-
- }
-
-
- }
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k;
- int space=lda/ncores;
- int max= space*coreid+space;
- static data_t B1[32*32];
- if (coreid==ncores-1){
- for (i=0; i<lda*lda/2;i++)
- {
- B1[i]=B[i];
- }
- }
- else{
- for (i=lda*lda/2;i<lda*lda;i++)
- B1[i]=B[i];
- }
- data_t temp=0;
- data_t temp1=0;
- data_t temp2=0;
- data_t temp3=0;
- data_t tempB=0;
-
- data_t temp_1=0;
- data_t temp1_1=0;
- data_t temp2_1=0;
- data_t temp3_1=0;
- data_t tempB_1=0;
-
- data_t temp_2=0;
- data_t temp1_2=0;
- data_t temp2_2=0;
- data_t temp3_2=0;
- data_t tempB_2=0;
-
- data_t temp_3=0;
- data_t temp1_3=0;
- data_t temp2_3=0;
- data_t temp3_3=0;
- data_t tempB_3=0;
- barrier(nc);
- if (coreid!=ncores-1){
- for (i=space*coreid;i<max/4*4;i+=4)
- {
- for(j=0;j<lda/4*4;j+=4)
- {
- temp=C[j+i*lda];
- temp1=C[j+(i+1)*lda];
- temp2=C[j+(i+2)*lda];
- temp3=C[j+(i+3)*lda];
- temp_1=C[j+1+i*lda];
- temp1_1=C[j+1+(i+1)*lda];
- temp2_1=C[j+1+(i+2)*lda];
- temp3_1=C[j+1+(i+3)*lda];
- temp_2=C[j+2+i*lda];
- temp1_2=C[j+2+(i+1)*lda];
- temp2_2=C[j+2+(i+2)*lda];
- temp3_2=C[j+2+(i+3)*lda];
- temp_3=C[j+3+i*lda];
- temp1_3=C[j+3+(i+1)*lda];
- temp2_3=C[j+3+(i+2)*lda];
- temp3_3=C[j+3+(i+3)*lda];
- for (k=0;k<lda;k++)
- {
- tempB=B[j+k*lda];
- temp+=A[k+i*lda]*tempB;
- temp1+=A[k+(i+1)*lda]*tempB;
- temp2+=A[k+(i+2)*lda]*tempB;
- temp3+=A[k+(i+3)*lda]*tempB;
-
- tempB_1=B[j+1+k*lda];
- temp_1+=A[k+i*lda]*tempB_1;
- temp1_1+=A[k+(i+1)*lda]*tempB_1;
- temp2_1+=A[k+(i+2)*lda]*tempB_1;
- temp3_1+=A[k+(i+3)*lda]*tempB_1;
-
- tempB_2=B[j+2+k*lda];
- temp_2+=A[k+i*lda]*tempB_2;
- temp1_2+=A[k+(i+1)*lda]*tempB_2;
- temp2_2+=A[k+(i+2)*lda]*tempB_2;
- temp3_2+=A[k+(i+3)*lda]*tempB_2;
-
- tempB_3=B[j+3+k*lda];
- temp_3+=A[k+i*lda]*tempB_3;
- temp1_3+=A[k+(i+1)*lda]*tempB_3;
- temp2_3+=A[k+(i+2)*lda]*tempB_3;
- temp3_3+=A[k+(i+3)*lda]*tempB_3;
- }
- C[j+i*lda]=temp;
- C[j+(i+1)*lda]=temp1;
- C[j+(i+2)*lda]=temp2;
- C[j+(i+3)*lda]=temp3;
-
- C[j+1+i*lda]=temp_1;
- C[j+1+(i+1)*lda]=temp1_1;
- C[j+1+(i+2)*lda]=temp2_1;
- C[j+1+(i+3)*lda]=temp3_1;
-
- C[j+2+i*lda]=temp_2;
- C[j+2+(i+1)*lda]=temp1_2;
- C[j+2+(i+2)*lda]=temp2_2;
- C[j+2+(i+3)*lda]=temp3_2;
-
- C[j+3+i*lda]=temp_3;
- C[j+3+(i+1)*lda]=temp1_3;
- C[j+3+(i+2)*lda]=temp2_3;
- C[j+3+(i+3)*lda]=temp3_3;
-
- }
- }
- }
- else{
- for (i=space*coreid;i<lda/4*4;i+=4)
- {
- for(j=0;j<lda/4*4;j+=4)
- {
- temp=C[j+i*lda];
- temp1=C[j+(i+1)*lda];
- temp2=C[j+(i+2)*lda];
- temp3=C[j+(i+3)*lda];
- temp_1=C[j+1+i*lda];
- temp1_1=C[j+1+(i+1)*lda];
- temp2_1=C[j+1+(i+2)*lda];
- temp3_1=C[j+1+(i+3)*lda];
- temp_2=C[j+2+i*lda];
- temp1_2=C[j+2+(i+1)*lda];
- temp2_2=C[j+2+(i+2)*lda];
- temp3_2=C[j+2+(i+3)*lda];
- temp_3=C[j+3+i*lda];
- temp1_3=C[j+3+(i+1)*lda];
- temp2_3=C[j+3+(i+2)*lda];
- temp3_3=C[j+3+(i+3)*lda];
- for (k=0;k<lda;k++)
- {
- tempB=B1[j+k*lda];
- temp+=A[k+i*lda]*tempB;
- temp1+=A[k+(i+1)*lda]*tempB;
- temp2+=A[k+(i+2)*lda]*tempB;
- temp3+=A[k+(i+3)*lda]*tempB;
-
- tempB_1=B1[j+1+k*lda];
- temp_1+=A[k+i*lda]*tempB_1;
- temp1_1+=A[k+(i+1)*lda]*tempB_1;
- temp2_1+=A[k+(i+2)*lda]*tempB_1;
- temp3_1+=A[k+(i+3)*lda]*tempB_1;
-
- tempB_2=B1[j+2+k*lda];
- temp_2+=A[k+i*lda]*tempB_2;
- temp1_2+=A[k+(i+1)*lda]*tempB_2;
- temp2_2+=A[k+(i+2)*lda]*tempB_2;
- temp3_2+=A[k+(i+3)*lda]*tempB_2;
-
- tempB_3=B1[j+3+k*lda];
- temp_3+=A[k+i*lda]*tempB_3;
- temp1_3+=A[k+(i+1)*lda]*tempB_3;
- temp2_3+=A[k+(i+2)*lda]*tempB_3;
- temp3_3+=A[k+(i+3)*lda]*tempB_3;
- }
- C[j+i*lda]=temp;
- C[j+(i+1)*lda]=temp1;
- C[j+(i+2)*lda]=temp2;
- C[j+(i+3)*lda]=temp3;
-
- C[j+1+i*lda]=temp_1;
- C[j+1+(i+1)*lda]=temp1_1;
- C[j+1+(i+2)*lda]=temp2_1;
- C[j+1+(i+3)*lda]=temp3_1;
-
- C[j+2+i*lda]=temp_2;
- C[j+2+(i+1)*lda]=temp1_2;
- C[j+2+(i+2)*lda]=temp2_2;
- C[j+2+(i+3)*lda]=temp3_2;
-
- C[j+3+i*lda]=temp_3;
- C[j+3+(i+1)*lda]=temp1_3;
- C[j+3+(i+2)*lda]=temp2_3;
- C[j+3+(i+3)*lda]=temp3_3;
-
- }
- }
- }
-
-
-
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
- size_t space=n/ncores;
- size_t max= space*coreid+space;
- if (coreid!=ncores-1){
- for (i=space*coreid;i<max;i+=1)
- {
- x[i] = x[i] + y[i];
- }
- }
- else{
- for(i=space*coreid;i<n;i+=1)
- {
- x[i] = x[i] + y[i];
- }
- }
- /*
- size_t i;
- size_t space=n/ncores;
- size_t max= space*coreid+space;
- if (n%ncores!=0)
- {
- space=space+1;
- }
- for (i=space*coreid;i<max&& i<n;i+=1)
- {
- x[i] = x[i] + y[i];
- }
- */
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-///*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-//*/
- /*
- int i, j, k, kk;
- if (coreid) {
- for ( i = 0; i < 16; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
- for ( i = 16; i < 32; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- */
-}
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- if (coreid) {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = 0; j < 16; j++ )
-// for ( j = 0; j < 16; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = 16; j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-///*
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-//*/
- /*
- int i, j, k, kk;
- if (coreid) {
- for ( i = 0; i < 16; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
- for ( i = 16; i < 32; i+=8 )
- {
- for ( j = 0; j < 32; j++ )
- {
- data_t temp0 = 0;
- data_t temp1 = 0;
- data_t temp2 = 0;
- data_t temp3 = 0;
- data_t temp4 = 0;
- data_t temp5 = 0;
- data_t temp6 = 0;
- data_t temp7 = 0;
- for ( kk = 0; kk < 32; kk+=8 )
- for ( k = kk; k < kk+8; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- */
-}
-
-
-#define KC 16
-#define IC 16
-#define JC 16
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k, ii, jj, kk;
- if (coreid) {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( jj = 0; jj < 16; jj+=16 )
- for ( kk = 0; kk < 32; kk+=16 )
- for ( j = jj; j < jj+16 && j < 16; j++ )
-// for ( j = 0; j < 16; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
-// for ( k = 0; k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
- }
- } else {
-// for ( ii = 0; ii < 32; ii+=IC )
- for ( jj = 16; jj < 32; jj+= 16 ) {
- for ( kk = 16; kk < 32; kk+=16 )
- for ( j = jj; j < jj+16 && j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- for ( kk = 0; kk < 16; kk+=16 )
- for ( j = jj; j < jj+16 && j < 32; j++ )
-// for ( j = 16; j < 32; j++ )
- {
- for ( i = 0; i < 32; i+=8 )
-// for ( i = ii; i < ii + IC && i < 32; i+=8 )
- {
- data_t temp0 = C[i+j*32];
- data_t temp1 = C[i+j*32+1];
- data_t temp2 = C[i+j*32+2];
- data_t temp3 = C[i+j*32+3];
- data_t temp4 = C[i+j*32+4];
- data_t temp5 = C[i+j*32+5];
- data_t temp6 = C[i+j*32+6];
- data_t temp7 = C[i+j*32+7];
- for ( k = kk; k < kk+16 && k < 32; k++ )
- {
- data_t tempA = A[j*32+k];
- temp0 += tempA * B[k*32 + i];
- temp1 += tempA * B[k*32 + i+1];
- temp2 += tempA * B[k*32 + i+2];
- temp3 += tempA * B[k*32 + i+3];
- temp4 += tempA * B[k*32 + i+4];
- temp5 += tempA * B[k*32 + i+5];
- temp6 += tempA * B[k*32 + i+6];
- temp7 += tempA * B[k*32 + i+7];
- }
- C[i+j*32] = temp0;
- C[i+j*32+1] = temp1;
- C[i+j*32+2] = temp2;
- C[i+j*32+3] = temp3;
- C[i+j*32+4] = temp4;
- C[i+j*32+5] = temp5;
- C[i+j*32+6] = temp6;
- C[i+j*32+7] = temp7;
- }
-
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************\r
-// Vector-vector add benchmark\r
-//--------------------------------------------------------------------------\r
-// Author : Andrew Waterman\r
-// TA : Christopher Celio\r
-// Student : \r
-//\r
-// This benchmark adds two vectors and writes the results to a\r
-// third vector. The input data (and reference data) should be\r
-// generated using the vvadd_gendata.pl perl script and dumped\r
-// to a file named dataset.h \r
-\r
-// to print out arrays, etc.\r
-//#define DEBUG\r
-\r
-//--------------------------------------------------------------------------\r
-// Includes \r
-\r
-#include <string.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Input/Reference Data\r
-\r
-typedef float data_t;\r
-#include "dataset.h"\r
- \r
- \r
-//--------------------------------------------------------------------------\r
-// Basic Utilities and Multi-thread Support\r
-\r
-__thread unsigned long coreid;\r
-unsigned long ncores;\r
-\r
-#include "util.h"\r
- \r
-#define stringify_1(s) #s\r
-#define stringify(s) stringify_1(s)\r
-#define stats(code) do { \\r
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \\r
- code; \\r
- _c += rdcycle(), _i += rdinstret(); \\r
- if (coreid == 0) \\r
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \\r
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \\r
- } while(0)\r
- \r
-\r
-//--------------------------------------------------------------------------\r
-// Helper functions\r
- \r
-void printArrayMT( char name[], int n, data_t arr[] )\r
-{\r
- int i;\r
- if (coreid != 0)\r
- return;\r
-\r
- printf( " %10s :", name );\r
- for ( i = 0; i < n; i++ )\r
- printf( " %4ld ", (long) arr[i] );\r
- printf( "\n" );\r
-}\r
- \r
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
-{\r
- if (coreid != 0)\r
- return;\r
-\r
- size_t i;\r
- for (i = 0; i < n; i++)\r
- {\r
- if (test[i] != correct[i])\r
- {\r
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", \r
- i, (long) test[i], i, (long)correct[i]);\r
- exit(-1);\r
- }\r
- }\r
- \r
- return;\r
-}\r
- \r
-//--------------------------------------------------------------------------\r
-// vvadd function\r
-\r
-//perform in-place vvadd\r
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)\r
-{\r
- size_t i;\r
-\r
- // interleave accesses\r
- for (i = coreid; i < n; i+=ncores)\r
- {\r
- x[i] = x[i] + y[i];\r
- }\r
-}\r
-\r
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)\r
-{\r
- // ***************************** //\r
- // **** ADD YOUR CODE HERE ***** //\r
- // ***************************** //\r
- size_t i;\r
-\r
- for (i = coreid*(n/ncores); i<(coreid+1)*n/ncores; i++)\r
- {\r
- x[i] = x[i] + y[i];\r
- }\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// Main\r
-//\r
-// all threads start executing thread_entry(). Use their "coreid" to\r
-// differentiate between threads (each thread is running on a separate core).\r
- \r
-void thread_entry(int cid, int nc)\r
-{\r
- coreid = cid;\r
- ncores = nc;\r
-\r
- // static allocates data in the binary, which is visible to both threads\r
- static data_t results_data[DATA_SIZE];\r
- \r
- // because we're going to perform an in-place vvadd (and we're going to run\r
- // it a couple of times) let's copy the input data to a temporary results\r
- // array\r
- \r
- size_t i;\r
- if (coreid == 0)\r
- {\r
- for (i = 0; i < DATA_SIZE; i++)\r
- results_data[i] = input1_data[i];\r
- }\r
-\r
-\r
- // Execute the provided, terrible vvadd\r
- barrier(nc);\r
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));\r
- \r
- \r
- // verify\r
- verifyMT(DATA_SIZE, results_data, verify_data);\r
- \r
- // reset results from the first trial\r
- if (coreid == 0) \r
- {\r
- for (i=0; i < DATA_SIZE; i++)\r
- results_data[i] = input1_data[i];\r
- }\r
- barrier(nc);\r
- \r
- \r
- // Execute your faster vvadd\r
- barrier(nc);\r
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));\r
-\r
-#ifdef DEBUG\r
- printArrayMT("results: ", DATA_SIZE, results_data);\r
- printArrayMT("verify : ", DATA_SIZE, verify_data);\r
-#endif\r
- \r
- // verify\r
- verifyMT(DATA_SIZE, results_data, verify_data);\r
- barrier(nc);\r
-\r
- exit(0);\r
-}\r
-\r
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+ data_t B_trans[32*32];
+ data_t acc_temp0, acc_temp1;
+ data_t *A_j, *B_i;
+ data_t *A_j_k, *B_i_k;
+ int z;
+
+ //for (i = 0; i < 32; i++) {
+ // for (j = 0; j < 32; j++) {
+ // B_trans[i*lda+j] = B[i+j*lda];
+ // }
+ //}
+
+ if (coreid == 0) {
+ for (i = 0; i < 32; i++) {
+ B_i = B_trans+i*32;
+ for (z = 0; z < 32; z++) {
+ *(B_i+z) = B[i+z*32];
+ }
+ for (j = 0; j < 16; j+=2) {
+ A_j = A+j*lda;
+ acc_temp0 = 0;
+ for (k = 0; k < 32; k+=8) {
+ A_j_k = A_j+k;
+ B_i_k = B_i+k;
+ acc_temp0 += *(A_j_k) * *(B_i_k);
+ acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1);
+ acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2);
+ acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3);
+ acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4);
+ acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5);
+ acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6);
+ acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7);
+ }
+ A_j += 32;
+
+ acc_temp1 = 0;
+ for (k = 0; k < 32; k+=8) {
+ acc_temp1 += *(A_j+k) * *(B_i+k);
+ acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
+ acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
+ acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
+ acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
+ acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
+ acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
+ acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
+ }
+
+ C[i + j*lda] = acc_temp0;
+ C[i + (j+1)*lda] = acc_temp1;
+ }
+ }
+ }
+ if (coreid == 1 || ncores == 1) {
+ for (i = 0; i < 32; i++) {
+ B_i = B_trans+i*32;
+ for (z = 0; z < 32; z++) {
+ *(B_i+z) = B[i+z*32];
+ }
+ for (j = 16; j < 32; j+=2) {
+ A_j = A+j*lda;
+ acc_temp0 = 0;
+ for (k = 0; k < 32; k+=8) {
+ acc_temp0 += *(A_j+k) * *(B_i+k);
+ acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1);
+ acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2);
+ acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3);
+ acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4);
+ acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5);
+ acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6);
+ acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7);
+ }
+ A_j += 32;
+
+ acc_temp1 = 0;
+ for (k = 0; k < 32; k+=8) {
+ acc_temp1 += *(A_j+k) * *(B_i+k);
+ acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
+ acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
+ acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
+ acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
+ acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
+ acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
+ acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
+ }
+ C[i + j*lda] = acc_temp0;
+ C[i + (j+1)*lda] = acc_temp1;
+ }
+ }
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t B_trans[32*32];
- data_t acc_temp0, acc_temp1;
- data_t *A_j, *B_i;
- data_t *A_j_k, *B_i_k;
- int z;
-
- //for (i = 0; i < 32; i++) {
- // for (j = 0; j < 32; j++) {
- // B_trans[i*lda+j] = B[i+j*lda];
- // }
- //}
-
- if (coreid == 0) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 0; j < 16; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- A_j_k = A_j+k;
- B_i_k = B_i+k;
- acc_temp0 += *(A_j_k) * *(B_i_k);
- acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1);
- acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2);
- acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3);
- acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4);
- acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5);
- acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6);
- acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
-
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- } else if (coreid == 1) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 16; j < 32; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp0 += *(A_j+k) * *(B_i+k);
- acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- }
-}
-
-void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- if (coreid == 0) {
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- } else if (coreid == 1) {
- for ( i = 16; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- for ( i = 0; i < 16; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-
- }
-}
-
-void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-}
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- // ENABLE_SHARING = false is MI
- // ENABLE_SHARING = true is MSI
- matmul_MI_transpose(lda, A, B, C);
- //matmul_MSI(lda, A, B, C);
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// //verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t B_trans[32*32];
- data_t acc_temp0, acc_temp1;
- data_t *A_j, *B_i;
- data_t *A_j_k, *B_i_k;
- int z;
-
- //for (i = 0; i < 32; i++) {
- // for (j = 0; j < 32; j++) {
- // B_trans[i*lda+j] = B[i+j*lda];
- // }
- //}
-
- if (coreid == 0) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 0; j < 16; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- A_j_k = A_j+k;
- B_i_k = B_i+k;
- acc_temp0 += *(A_j_k) * *(B_i_k);
- acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1);
- acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2);
- acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3);
- acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4);
- acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5);
- acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6);
- acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
-
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- } else if (coreid == 1) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 16; j < 32; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp0 += *(A_j+k) * *(B_i+k);
- acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- }
-}
-
-void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- if (coreid == 0) {
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- } else if (coreid == 1) {
- for ( i = 16; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- for ( i = 0; i < 16; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-
- }
-}
-
-void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-}
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- // ENABLE_SHARING = false is MI
- // ENABLE_SHARING = true is MSI
- matmul_MI_transpose(lda, A, B, C);
- //matmul_MSI(lda, A, B, C);
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// //verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-//
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-
- size_t i;
- for (i = 0; i < (n/ncores); i+= 1)
- {
- size_t ind = (n/ncores)*coreid+i;
- x[ind] = x[ind] + y[ind];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t B_trans[32*32];
- data_t acc_temp0, acc_temp1;
- data_t *A_j, *B_i;
- data_t *A_j_k, *B_i_k;
- int z;
-
- //for (i = 0; i < 32; i++) {
- // for (j = 0; j < 32; j++) {
- // B_trans[i*lda+j] = B[i+j*lda];
- // }
- //}
-
- if (coreid == 0) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 0; j < 16; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- A_j_k = A_j+k;
- B_i_k = B_i+k;
- acc_temp0 += *(A_j_k) * *(B_i_k);
- acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1);
- acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2);
- acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3);
- acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4);
- acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5);
- acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6);
- acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
-
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- } else if (coreid == 1) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 16; j < 32; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp0 += *(A_j+k) * *(B_i+k);
- acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- }
-}
-
-void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- if (coreid == 0) {
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- } else if (coreid == 1) {
- for ( i = 16; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- for ( i = 0; i < 16; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-
- }
-}
-
-void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-}
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- // ENABLE_SHARING = false is MI
- // ENABLE_SHARING = true is MSI
- matmul_MI_transpose(lda, A, B, C);
- //matmul_MSI(lda, A, B, C);
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// //verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-void __attribute__((noinline)) matmul_MI_transpose(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t B_trans[32*32];
- data_t acc_temp0, acc_temp1;
- data_t *A_j, *B_i;
- data_t *A_j_k, *B_i_k;
- int z;
-
- //for (i = 0; i < 32; i++) {
- // for (j = 0; j < 32; j++) {
- // B_trans[i*lda+j] = B[i+j*lda];
- // }
- //}
-
- if (coreid == 0) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 0; j < 16; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- A_j_k = A_j+k;
- B_i_k = B_i+k;
- acc_temp0 += *(A_j_k) * *(B_i_k);
- acc_temp0 += *(A_j_k + 1) * *(B_i_k + 1);
- acc_temp0 += *(A_j_k + 2) * *(B_i_k + 2);
- acc_temp0 += *(A_j_k + 3) * *(B_i_k + 3);
- acc_temp0 += *(A_j_k + 4) * *(B_i_k + 4);
- acc_temp0 += *(A_j_k + 5) * *(B_i_k + 5);
- acc_temp0 += *(A_j_k + 6) * *(B_i_k + 6);
- acc_temp0 += *(A_j_k + 7) * *(B_i_k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
-
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- } else if (coreid == 1) {
- for (i = 0; i < 32; i++) {
- B_i = B_trans+i*32;
- for (z = 0; z < 32; z++) {
- *(B_i+z) = B[i+z*32];
- }
- for (j = 16; j < 32; j+=2) {
- A_j = A+j*lda;
- acc_temp0 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp0 += *(A_j+k) * *(B_i+k);
- acc_temp0 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp0 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp0 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp0 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp0 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp0 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp0 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- A_j += 32;
-
- acc_temp1 = 0;
- for (k = 0; k < 32; k+=8) {
- acc_temp1 += *(A_j+k) * *(B_i+k);
- acc_temp1 += *(A_j+k + 1) * *(B_i+k + 1);
- acc_temp1 += *(A_j+k + 2) * *(B_i+k + 2);
- acc_temp1 += *(A_j+k + 3) * *(B_i+k + 3);
- acc_temp1 += *(A_j+k + 4) * *(B_i+k + 4);
- acc_temp1 += *(A_j+k + 5) * *(B_i+k + 5);
- acc_temp1 += *(A_j+k + 6) * *(B_i+k + 6);
- acc_temp1 += *(A_j+k + 7) * *(B_i+k + 7);
- }
- C[i + j*lda] = acc_temp0;
- C[i + (j+1)*lda] = acc_temp1;
- }
- }
- }
-}
-
-void __attribute__((noinline)) matmul_MI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- if (coreid == 0) {
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- } else if (coreid == 1) {
- for ( i = 16; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
- for ( i = 0; i < 16; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k+=4 )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- acc_temp += *(A_j + k + 1) * *(B_i + (k+1)*32);
- acc_temp += *(A_j + k + 2) * *(B_i + (k+2)*32);
- acc_temp += *(A_j + k + 3) * *(B_i + (k+3)*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-
- }
-}
-
-void __attribute__((noinline)) matmul_MSI(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
- data_t acc_temp;
- data_t *A_j, *B_i;
- int j_start = coreid*16;
- int j_end = (coreid*16)+16;
- for ( i = 0; i < 32; i++ ) {
- B_i = B + i;
- for ( j = j_start; j < j_end; j++ )
- {
- acc_temp = 0;
- A_j = A + j*32;
- for ( k = 0; k < 32; k++ )
- {
- acc_temp += *(A_j + k) * *(B_i + k*32);
- }
- C[i + j*32] = acc_temp;
- }
- }
-}
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- // ENABLE_SHARING = false is MI
- // ENABLE_SHARING = true is MSI
- matmul_MI_transpose(lda, A, B, C);
- //matmul_MSI(lda, A, B, C);
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// //verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
-
- if (coreid == 0) {
- for (i = 0; i < n/2; i++)
- {
- x[i] = x[i] + y[i];
- }
- } else if (coreid == 1) {
- for (i = n/2; i < n; i++)
- {
- x[i] = x[i] + y[i];
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int j2, i2, k2, j, i, k;
+ int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07;
+ int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17;
+ int jBLOCK = 32;
+ int iBLOCK = 16;
+ int kBLOCK = 32;
+ static __thread int tB[4096]; //__thread
+ int startInd = coreid*(lda/ncores);
+ int endInd = (coreid+1)*(lda/ncores);
+
+ //tranpose B (block?)
+ for (i = 0; i < lda; i += 2) {
+ for (j = startInd; j < endInd; j += 2) {
+ tB[j*lda + i] = B[i*lda + j];
+ tB[(j + 1)*lda + i] = B[i*lda + j + 1];
+ tB[j*lda + i + 1] = B[(i + 1)*lda + j];
+ tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
+ }
+ barrier(ncores);
+ }
+
+ // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
+ for ( j2 = 0; j2 < lda; j2 += jBLOCK )
+ for ( i2 = startInd; i2 < endInd; i2 += iBLOCK )
+ for ( j = j2; j < j2 + jBLOCK; j += 2 )
+ for ( k2 = 0; k2 < lda; k2 += kBLOCK )
+ for ( i = i2; i < i2 + iBLOCK; i += 4) {
+ tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0];
+ tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1];
+ tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2];
+ tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3];
+ //tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4];
+ //tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5];
+ //tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6];
+ //tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7];
+ for ( k = k2; k < k2 + kBLOCK; k += 4) {
+ tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k];
+ tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k];
+ tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k];
+ tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k];
+ //tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k];
+ //tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k];
+ //tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k];
+ //tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k];
+ tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k];
+ tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k];
+ tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k];
+ tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k];
+ //tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k];
+ //tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k];
+ //tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k];
+ //tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k];
+
+ tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1];
+ tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1];
+ tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1];
+ tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1];
+ //tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1];
+ //tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1];
+ //tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1];
+ //tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1];
+ tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1];
+ tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1];
+ tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1];
+ tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1];
+ //tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1];
+ //tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1];
+ //tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1];
+ //tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1];
+
+ tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2];
+ tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2];
+ tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2];
+ tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2];
+ //tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2];
+ //tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2];
+ //tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2];
+ //tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2];
+ tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2];
+ tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2];
+ tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2];
+ tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2];
+ //tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2];
+ //tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2];
+ //tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2];
+ //tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2];
+
+ tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3];
+ tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3];
+ tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3];
+ tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3];
+ //tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3];
+ //tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3];
+ //tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3];
+ //tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3];
+ tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3];
+ tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3];
+ tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3];
+ tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3];
+ //tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3];
+ //tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3];
+ //tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3];
+ //tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3];
+ }
+ C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10;
+ C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11;
+ C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12;
+ C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13;
+ //C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14;
+ //C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15;
+ //C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16;
+ //C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17;
+ barrier(ncores);
+ }
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: Benjamin Han
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int j2, i2, k2, j, i, k;
- int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07;
- int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17;
- int jBLOCK = 32;
- int iBLOCK = 16;
- int kBLOCK = 32;
- static __thread int tB[4096]; //__thread
- int startInd = 0;
- int endInd = lda >> 1;
- if (coreid == 1) {
- startInd = lda >> 1;
- endInd = lda;
- }
-
- //tranpose B (block?)
- for (i = 0; i < lda; i += 2) {
- for (j = startInd; j < endInd; j += 2) {
- tB[j*lda + i] = B[i*lda + j];
- tB[(j + 1)*lda + i] = B[i*lda + j + 1];
- tB[j*lda + i + 1] = B[(i + 1)*lda + j];
- tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
- }
- }
- barrier(ncores);
-
- // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
- for ( j2 = 0; j2 < lda; j2 += jBLOCK )
- for ( i2 = startInd; i2 < endInd; i2 += iBLOCK )
- for ( j = j2; j < j2 + jBLOCK; j += 2 )
- for ( k2 = 0; k2 < lda; k2 += kBLOCK )
- for ( i = i2; i < i2 + iBLOCK; i += 4) {
- tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0];
- tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1];
- tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2];
- tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3];
- //tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4];
- //tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5];
- //tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6];
- //tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7];
- for ( k = k2; k < k2 + kBLOCK; k += 4) {
- tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k];
- tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k];
- tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k];
- tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k];
- //tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k];
- //tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k];
- //tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k];
- //tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k];
- tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k];
- tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k];
- tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k];
- tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k];
- //tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k];
- //tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k];
- //tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k];
- //tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k];
-
- tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1];
- tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1];
- tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1];
- tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1];
- //tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1];
- //tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1];
- //tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1];
- //tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1];
- tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1];
- tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1];
- tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1];
- tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1];
- //tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1];
- //tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1];
- //tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1];
- //tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1];
-
- tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2];
- tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2];
- tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2];
- tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2];
- //tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2];
- //tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2];
- //tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2];
- //tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2];
- tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2];
- tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2];
- tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2];
- tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2];
- //tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2];
- //tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2];
- //tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2];
- //tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2];
-
- tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3];
- tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3];
- tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3];
- tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3];
- //tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3];
- //tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3];
- //tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3];
- //tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3];
- tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3];
- tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3];
- tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3];
- tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3];
- //tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3];
- //tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3];
- //tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3];
- //tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3];
- }
- C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10;
- C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11;
- C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12;
- C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13;
- //C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14;
- //C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15;
- //C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16;
- //C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17;
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: Benjamin Han
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int j2, i2, k2, j, i, k;
- int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07;
- int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17;
- int jBLOCK = 32;
- int iBLOCK = 16;
- int kBLOCK = 32;
- static __thread int tB[4096]; //__thread
- int startInd = 0;
- int endInd = lda >> 1;
- if (coreid == 1) {
- startInd = lda >> 1;
- endInd = lda;
- }
-
- //tranpose B (block?)
- for (i = 0; i < lda; i += 2) {
- for (j = startInd; j < endInd; j += 2) {
- tB[j*lda + i] = B[i*lda + j];
- tB[(j + 1)*lda + i] = B[i*lda + j + 1];
- tB[j*lda + i + 1] = B[(i + 1)*lda + j];
- tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
- }
- }
- barrier(nc);
-
- // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
- for ( j2 = 0; j2 < lda; j2 += jBLOCK )
- for ( i2 = startInd; i2 < endInd; i2 += iBLOCK )
- for ( j = j2; j < j2 + jBLOCK; j += 2 )
- for ( k2 = 0; k2 < lda; k2 += kBLOCK )
- for ( i = i2; i < i2 + iBLOCK; i += 4) {
- tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0];
- tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1];
- tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2];
- tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3];
- //tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4];
- //tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5];
- //tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6];
- //tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7];
- for ( k = k2; k < k2 + kBLOCK; k += 4) {
- tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k];
- tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k];
- tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k];
- tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k];
- //tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k];
- //tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k];
- //tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k];
- //tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k];
- tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k];
- tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k];
- tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k];
- tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k];
- //tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k];
- //tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k];
- //tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k];
- //tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k];
-
- tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1];
- tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1];
- tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1];
- tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1];
- //tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1];
- //tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1];
- //tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1];
- //tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1];
- tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1];
- tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1];
- tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1];
- tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1];
- //tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1];
- //tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1];
- //tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1];
- //tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1];
-
- tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2];
- tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2];
- tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2];
- tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2];
- //tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2];
- //tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2];
- //tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2];
- //tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2];
- tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2];
- tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2];
- tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2];
- tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2];
- //tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2];
- //tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2];
- //tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2];
- //tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2];
-
- tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3];
- tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3];
- tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3];
- tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3];
- //tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3];
- //tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3];
- //tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3];
- //tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3];
- tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3];
- tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3];
- tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3];
- tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3];
- //tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3];
- //tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3];
- //tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3];
- //tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3];
- }
- C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10;
- C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11;
- C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12;
- C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13;
- //C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14;
- //C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15;
- //C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16;
- //C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17;
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student : Benjamin Han
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- int startInd = 0;
- int endInd = n >> 1;
- if (coreid == 1) {
- startInd = n >> 1;
- endInd = n;
- }
- for (size_t i = startInd ; i < endInd; i+=1) {
- x[i] = x[i] + y[i];
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i,j,k,a,b,a1,a2,a3,c;
+ for (j=coreid; j<lda; j+=4*ncores){
+ a=j*lda;
+ a1=(j+1*ncores)*lda;
+ a2=(j+2*ncores)*lda;
+ a3=(j+3*ncores)*lda;
+ for (k=0;k<lda; k++)
+ {
+ b = k*lda;
+ for (i=0;i<lda;i++){
+ c = B[b+i];
+ C[i+a]+=A[a+k]*c;
+ C[i+a1]+=A[a1+k]*c;
+ C[i+a2]+=A[a2+k]*c;
+ C[i+a3]+=A[a3+k]*c;
+}
+}
+}
+}
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student: Ryan Ricks
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i,j,k,a,b,a1,a2,a3,c;
- for (j=coreid; j<lda; j+=8){
- a=j*lda;
- a1=(j+2)*lda;
- a2=(j+4)*lda;
- a3=(j+6)*lda;
- for (k=0;k<lda; k++)
- {
- b = k*lda;
- for (i=0;i<lda;i++){
- c = B[b+i];
- C[i+a]+=A[a+k]*c;
- C[i+a1]+=A[a1+k]*c;
- C[i+a2]+=A[a2+k]*c;
- C[i+a3]+=A[a3+k]*c;
-}
-}
-}
-// ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-int i,j,k,a,b,b1,a1,a2,a3,c,c1,c2,c3,b2,b3;
- for (j=coreid*4; j<lda; j+=8){
- a=j*lda;
- a1=(j+1)*lda;
- a2=(j+2)*lda;
- a3=(j+3)*lda;
- for (k=0;k<lda; k+=2)
- {
- b = k*lda;
- b1 = (k+1)*lda;
- for (i=0;i<lda;i++){
- c = B[b+i];
- c1 = B[b1+i];
- C[i+a]+=A[a+k]*c;
- C[i+a1]+=A[a1+k]*c;
- C[i+a2]+=A[a2+k]*c;
- C[i+a3]+=A[a3+k]*c;
- C[i+a]+=A[a+k+1]*c1;
- C[i+a1]+=A[a1+k+1]*c1;
- C[i+a2]+=A[a2+k+1]*c1;
- C[i+a3]+=A[a3+k+1]*c1;
-}
-}
-}
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
-// // Execute the provided, naive matmul
-// barrier(nc);
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-//
-//
-// // verify
-// verifyMT(ARRAY_SIZE, results_data, verify_data);
-//
-// // clear results from the first trial
-// size_t i;
-// if (coreid == 0)
-// for (i=0; i < ARRAY_SIZE; i++)
-// results_data[i] = 0;
-// barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- size_t i;
- for (i = coreid*8; i<n/16*16; i+=16){
- x[i]=x[i]+y[i];
- x[i+1]=x[i+1]+y[i+1];
- x[i+2]=x[i+2]+y[i+2];
- x[i+3]=x[i+3]+y[i+3];
- x[i+4]=x[i+4]+y[i+4];
- x[i+5]=x[i+5]+y[i+5];
- x[i+6]=x[i+6]+y[i+6];
- x[i+7]=x[i+7]+y[i+7];
- }
- for (i = coreid+n/16*16; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i]; }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k , jj , kk;
- int start_i = coreid*lda/2;
- int end_i = start_i + lda/2;
- int step_j, step_k;
- int start_k, end_k, start_j, end_j;
- int j_lda;
- int pos_A , pos_B, pos_C;
- data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07;
- data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17;
- data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7;
-
- if (coreid == 0)
- {
- step_k = 1;
- start_k= 0;
- end_k = lda;
-
- step_j = 2;
- start_j= 0;
- end_j = lda;
-
- }else
- {
-
- step_k = -1;
- start_k = lda-1;
- end_k = -1;
-
- step_j = -2;
- start_j= lda-2;
- end_j = -2;
- }
-
- for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) )
- {
- for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) )
- {
- for ( i = start_i; i < end_i; i+=8 )
- {
- //pos_C = i + jj*lda;
- for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j )
- {
-
- pos_C = i + j*lda;
- temp00 = C[(pos_C + 0)];
- temp01 = C[(pos_C + 1)];
- temp02 = C[(pos_C + 2)];
- temp03 = C[(pos_C + 3)];
- temp04 = C[(pos_C + 4)];
- temp05 = C[(pos_C + 5)];
- temp06 = C[(pos_C + 6)];
- temp07 = C[(pos_C + 7)];
-
- //pos_C += lda;
- pos_C = i + (j+1)*lda;
-
- temp10 = C[(pos_C + 0)];
- temp11 = C[(pos_C + 1)];
- temp12 = C[(pos_C + 2)];
- temp13 = C[(pos_C + 3)];
- temp14 = C[(pos_C + 4)];
- temp15 = C[(pos_C + 5)];
- temp16 = C[(pos_C + 6)];
- temp17 = C[(pos_C + 7)];
-
- pos_B = kk*lda + i;
- pos_A = j*lda + kk;
- for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k )
- {
- temp_A0 = A[ pos_A ] ;
- temp_A1 = A[pos_A +lda];
-
- temp00 += temp_A0 * B[(pos_B + 0)];
- temp01 += temp_A0 * B[(pos_B + 1)];
- temp02 += temp_A0 * B[(pos_B + 2)];
- temp03 += temp_A0 * B[(pos_B + 3)];
- temp04 += temp_A0 * B[(pos_B + 4)];
- temp05 += temp_A0 * B[(pos_B + 5)];
- temp06 += temp_A0 * B[(pos_B + 6)];
- temp07 += temp_A0 * B[(pos_B + 7)];
-
- temp10 += temp_A1 * B[(pos_B + 0)];
- temp11 += temp_A1 * B[(pos_B + 1)];
- temp12 += temp_A1 * B[(pos_B + 2)];
- temp13 += temp_A1 * B[(pos_B + 3)];
- temp14 += temp_A1 * B[(pos_B + 4)];
- temp15 += temp_A1 * B[(pos_B + 5)];
- temp16 += temp_A1 * B[(pos_B + 6)];
- temp17 += temp_A1 * B[(pos_B + 7)];
-
- pos_B += (lda*step_k) ;
- pos_A += step_k;
- }
- //barrier(nc);
-
- C[(pos_C + 0)] = temp10;
- C[(pos_C + 1)] = temp11;
- C[(pos_C + 2)] = temp12;
- C[(pos_C + 3)] = temp13;
- C[(pos_C + 4)] = temp14;
- C[(pos_C + 5)] = temp15;
- C[(pos_C + 6)] = temp16;
- C[(pos_C + 7)] = temp17;
- //barrier(nc);
-
- pos_C = i + j*lda;
- //pos_C -= lda;
- C[(pos_C + 0)] = temp00;
- C[(pos_C + 1)] = temp01;
- C[(pos_C + 2)] = temp02;
- C[(pos_C + 3)] = temp03;
- C[(pos_C + 4)] = temp04;
- C[(pos_C + 5)] = temp05;
- C[(pos_C + 6)] = temp06;
- C[(pos_C + 7)] = temp07;
- //barrier(nc);
- //pos_C += step_j * lda;
- }
- //barrier(nc);
- }
- //barrier(nc);
-
- }
- //barrier(nc);
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
- /*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- */
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
-
- //printf("input1_data");
-exit(0);
-
-}
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
- int i, j, k;
- int temp0, temp1,temp2,temp3,temp4,temp5,temp6,temp7;
- int start = coreid*lda/2;
- int end = start + lda/2;
- int j_lda;
- int temp_i;
- int temp_A0, temp_A1, temp_A2, temp_A3 ;
-
- for ( i = start; i < end; i+=8){
- for ( j = 0; j < lda; j++)
- {
- j_lda = j*lda;
- temp0 = C[(i+0) + j_lda];
- temp1 = C[(i+1) + j_lda];
- temp2 = C[(i+2) + j_lda];
- temp3 = C[(i+3) + j_lda];
- temp4 = C[(i+4) + j_lda];
- temp5 = C[(i+5) + j_lda];
- temp6 = C[(i+6) + j_lda];
- temp7 = C[(i+7) + j_lda];
-
-
-
- for ( k = 0; k < lda; k+=4)
- {
- temp_i = i;
- temp_A0 = A[j_lda + (k+0)] ;
- temp_A1 = A[j_lda + (k+1)] ;
- temp_A2 = A[j_lda + (k+2)] ;
- temp_A3 = A[j_lda + (k+3)] ;
-
-
- temp0 += temp_A0 * B[(k+0)*lda + temp_i];
- temp0 += temp_A1 * B[(k+1)*lda + temp_i];
- temp0 += temp_A2 * B[(k+2)*lda + temp_i];
- temp0 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
- temp1 += temp_A0 * B[(k+0)*lda + temp_i];
- temp1 += temp_A1 * B[(k+1)*lda + temp_i];
- temp1 += temp_A2 * B[(k+2)*lda + temp_i];
- temp1 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
- temp2 += temp_A0 * B[(k+0)*lda + temp_i];
- temp2 += temp_A1 * B[(k+1)*lda + temp_i];
- temp2 += temp_A2 * B[(k+2)*lda + temp_i];
- temp2 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
-
- temp3 += temp_A0 * B[(k+0)*lda + temp_i];
- temp3 += temp_A1 * B[(k+1)*lda + temp_i];
- temp3 += temp_A2 * B[(k+2)*lda + temp_i];
- temp3 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
- temp4 += temp_A0 * B[(k+0)*lda + temp_i];
- temp4 += temp_A1 * B[(k+1)*lda + temp_i];
- temp4 += temp_A2 * B[(k+2)*lda + temp_i];
- temp4 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
- temp5 += temp_A0 * B[(k+0)*lda + temp_i];
- temp5 += temp_A1 * B[(k+1)*lda + temp_i];
- temp5 += temp_A2 * B[(k+2)*lda + temp_i];
- temp5 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
- temp6 += temp_A0 * B[(k+0)*lda + temp_i];
- temp6 += temp_A1 * B[(k+1)*lda + temp_i];
- temp6 += temp_A2 * B[(k+2)*lda + temp_i];
- temp6 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
-
- temp7 += temp_A0 * B[(k+0)*lda + temp_i];
- temp7 += temp_A1 * B[(k+1)*lda + temp_i];
- temp7 += temp_A2 * B[(k+2)*lda + temp_i];
- temp7 += temp_A3 * B[(k+3)*lda + temp_i];
- temp_i++;
-
- }
-
- C[i + j*lda] = temp0;
- C[(i+1) + j*lda] = temp1;
- C[(i+2) + j*lda] = temp2;
- C[(i+3) + j*lda] = temp3;
- C[(i+4) + j*lda] = temp4;
- C[(i+5) + j*lda] = temp5;
- C[(i+6) + j*lda] = temp6;
- C[(i+7) + j*lda] = temp7;
-
- }
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
- /*
- // Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier();
-
- */
- // Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
-
-#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
-
- exit(0);
-}
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
- int i, j, k , jj , kk;
- int start_i = coreid*lda/2;
- int end_i = start_i + lda/2;
- int step_j, step_k;
- int start_k, end_k, start_j, end_j;
- int j_lda;
- int pos_A , pos_B, pos_C;
- data_t temp00, temp01,temp02,temp03,temp04,temp05,temp06,temp07;
- data_t temp10, temp11,temp12,temp13,temp14,temp15,temp16,temp17;
- data_t temp_A0, temp_A1, temp_A2, temp_A3, temp_A4, temp_A5, temp_A6, temp_A7;
-
-
- if (coreid == 0)
- {
- step_k = 1;
- start_k= 0;
- end_k = lda;
-
- step_j = 2;
- start_j= 0;
- end_j = lda;
-
- }else
- {
-
- step_k = -1;
- start_k = lda-1;
- end_k = -1;
-
- step_j = -2;
- start_j= lda-2;
- end_j = -2;
- }
-
- for( kk = start_k ; kk!= end_k ; kk+=(step_k*16) )
- {
- for( jj = start_j ; jj!= end_j ; jj+=(step_j*8) )
- {
- for ( i = start_i; i < end_i; i+=8 )
- {
- //pos_C = i + jj*lda;
- for ( j = jj; j != (jj+(step_j*8)) ; j+=step_j )
- {
-
- pos_C = i + j*lda;
- temp00 = C[(pos_C + 0)];
- temp01 = C[(pos_C + 1)];
- temp02 = C[(pos_C + 2)];
- temp03 = C[(pos_C + 3)];
- temp04 = C[(pos_C + 4)];
- temp05 = C[(pos_C + 5)];
- temp06 = C[(pos_C + 6)];
- temp07 = C[(pos_C + 7)];
-
- //pos_C += lda;
- pos_C = i + (j+1)*lda;
-
- temp10 = C[(pos_C + 0)];
- temp11 = C[(pos_C + 1)];
- temp12 = C[(pos_C + 2)];
- temp13 = C[(pos_C + 3)];
- temp14 = C[(pos_C + 4)];
- temp15 = C[(pos_C + 5)];
- temp16 = C[(pos_C + 6)];
- temp17 = C[(pos_C + 7)];
-
- pos_B = kk*lda + i;
- pos_A = j*lda + kk;
- for ( k = kk; k != (kk+(step_k*16)) ; k+=step_k )
- {
- temp_A0 = A[ pos_A ] ;
- temp_A1 = A[pos_A +lda];
-
- temp00 += temp_A0 * B[(pos_B + 0)];
- temp01 += temp_A0 * B[(pos_B + 1)];
- temp02 += temp_A0 * B[(pos_B + 2)];
- temp03 += temp_A0 * B[(pos_B + 3)];
- temp04 += temp_A0 * B[(pos_B + 4)];
- temp05 += temp_A0 * B[(pos_B + 5)];
- temp06 += temp_A0 * B[(pos_B + 6)];
- temp07 += temp_A0 * B[(pos_B + 7)];
-
- temp10 += temp_A1 * B[(pos_B + 0)];
- temp11 += temp_A1 * B[(pos_B + 1)];
- temp12 += temp_A1 * B[(pos_B + 2)];
- temp13 += temp_A1 * B[(pos_B + 3)];
- temp14 += temp_A1 * B[(pos_B + 4)];
- temp15 += temp_A1 * B[(pos_B + 5)];
- temp16 += temp_A1 * B[(pos_B + 6)];
- temp17 += temp_A1 * B[(pos_B + 7)];
-
- pos_B += (lda*step_k) ;
- pos_A += step_k;
- }
- //barrier(nc);
-
- C[(pos_C + 0)] = temp10;
- C[(pos_C + 1)] = temp11;
- C[(pos_C + 2)] = temp12;
- C[(pos_C + 3)] = temp13;
- C[(pos_C + 4)] = temp14;
- C[(pos_C + 5)] = temp15;
- C[(pos_C + 6)] = temp16;
- C[(pos_C + 7)] = temp17;
- //barrier(nc);
-
- pos_C = i + j*lda;
- //pos_C -= lda;
- C[(pos_C + 0)] = temp00;
- C[(pos_C + 1)] = temp01;
- C[(pos_C + 2)] = temp02;
- C[(pos_C + 3)] = temp03;
- C[(pos_C + 4)] = temp04;
- C[(pos_C + 5)] = temp05;
- C[(pos_C + 6)] = temp06;
- C[(pos_C + 7)] = temp07;
- //barrier(nc);
- //pos_C += step_j * lda;
- }
- //barrier(nc);
- }
- //barrier(nc);
-
- }
- //barrier(nc);
- }
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
- /*
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- */
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
-
- //printf("input1_data");
-exit(0);
-
-}
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- size_t i;
- size_t chunk_size = n/ncores;
- size_t start = chunk_size * coreid;
- size_t end = start + chunk_size;
-
- for( i = start ; i < end; i++ )
- {
- x[i]=x[i]+y[i];
- }
- // ***************************** //
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ data_t a1;
+ data_t a2;
+ data_t a3;
+ data_t a4;
+ data_t a5;
+ data_t a6;
+ data_t a7;
+ data_t a8;
+ data_t *b1;
+ data_t *b2;
+ data_t *b3;
+ data_t *b4;
+ data_t *b5;
+ data_t *b6;
+ data_t *b7;
+ data_t *b8;
+ data_t c1;
+ data_t c2;
+ data_t c3;
+ data_t c4;
+ data_t c5;
+ data_t c6;
+ data_t c7;
+ data_t c8;
+ int i, j, k;
+ int start, end;
+ static data_t BB[1024];
+
+
+ //transpose B
+ for ( k = 0; k < lda; k++) {
+ for ( i = coreid*(lda/ncores); i < (coreid+1)*(lda/ncores); i++ ) {
+ BB[i*lda + k] = B[k*lda + i];
+ }
+ barrier(ncores);
+ }
+
+ for ( int x = 0; x < ncores; x++) {
+ //split the i values into two chunks so the threads don't interfere on the B loads
+ //this could be generalized if needed, but I won't bother since it would be tricky
+ //and we already know the size and numthreads
+ start = x * (32 / ncores);
+ end = (x+1) * (32 / ncores);
+ for ( i = start; i < end; i+=8 ) {
+ for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) {
+ c1=0;c2=0;c3=0;c4=0;c5=0;c6=0;c7=0;c8=0;
+ b1 = &BB[(i+0)*lda];
+ b2 = &BB[(i+1)*lda];
+ b3 = &BB[(i+2)*lda];
+ b4 = &BB[(i+3)*lda];
+ b5 = &BB[(i+4)*lda];
+ b6 = &BB[(i+5)*lda];
+ b7 = &BB[(i+6)*lda];
+ b8 = &BB[(i+7)*lda];
+
+ for ( k = 0; k < lda; k+=8 ) {
+ a1 = A[j*lda + k+0];
+ a2 = A[j*lda + k+1];
+ a3 = A[j*lda + k+2];
+ a4 = A[j*lda + k+3];
+ a5 = A[j*lda + k+4];
+ a6 = A[j*lda + k+5];
+ a7 = A[j*lda + k+6];
+ a8 = A[j*lda + k+7];
+
+ c1 += a1 * b1[k+0];
+ c1 += a2 * b1[k+1];
+ c1 += a3 * b1[k+2];
+ c1 += a4 * b1[k+3];
+ c1 += a5 * b1[k+4];
+ c1 += a6 * b1[k+5];
+ c1 += a7 * b1[k+6];
+ c1 += a8 * b1[k+7];
+
+ c2 += a1 * b2[k+0];
+ c2 += a2 * b2[k+1];
+ c2 += a3 * b2[k+2];
+ c2 += a4 * b2[k+3];
+ c2 += a5 * b2[k+4];
+ c2 += a6 * b2[k+5];
+ c2 += a7 * b2[k+6];
+ c2 += a8 * b2[k+7];
+
+ c3 += a1 * b3[k+0];
+ c3 += a2 * b3[k+1];
+ c3 += a3 * b3[k+2];
+ c3 += a4 * b3[k+3];
+ c3 += a5 * b3[k+4];
+ c3 += a6 * b3[k+5];
+ c3 += a7 * b3[k+6];
+ c3 += a8 * b3[k+7];
+
+ c4 += a1 * b4[k+0];
+ c4 += a2 * b4[k+1];
+ c4 += a3 * b4[k+2];
+ c4 += a4 * b4[k+3];
+ c4 += a5 * b4[k+4];
+ c4 += a6 * b4[k+5];
+ c4 += a7 * b4[k+6];
+ c4 += a8 * b4[k+7];
+
+ c5 += a1 * b5[k+0];
+ c5 += a2 * b5[k+1];
+ c5 += a3 * b5[k+2];
+ c5 += a4 * b5[k+3];
+ c5 += a5 * b5[k+4];
+ c5 += a6 * b5[k+5];
+ c5 += a7 * b5[k+6];
+ c5 += a8 * b5[k+7];
+
+ c6 += a1 * b6[k+0];
+ c6 += a2 * b6[k+1];
+ c6 += a3 * b6[k+2];
+ c6 += a4 * b6[k+3];
+ c6 += a5 * b6[k+4];
+ c6 += a6 * b6[k+5];
+ c6 += a7 * b6[k+6];
+ c6 += a8 * b6[k+7];
+
+ c7 += a1 * b7[k+0];
+ c7 += a2 * b7[k+1];
+ c7 += a3 * b7[k+2];
+ c7 += a4 * b7[k+3];
+ c7 += a5 * b7[k+4];
+ c7 += a6 * b7[k+5];
+ c7 += a7 * b7[k+6];
+ c7 += a8 * b7[k+7];
+
+ c8 += a1 * b8[k+0];
+ c8 += a2 * b8[k+1];
+ c8 += a3 * b8[k+2];
+ c8 += a4 * b8[k+3];
+ c8 += a5 * b8[k+4];
+ c8 += a6 * b8[k+5];
+ c8 += a7 * b8[k+6];
+ c8 += a8 * b8[k+7];
+ }
+ C[i+0 + j*lda] += c1;
+ C[i+1 + j*lda] += c2;
+ C[i+2 + j*lda] += c3;
+ C[i+3 + j*lda] += c4;
+ C[i+4 + j*lda] += c5;
+ C[i+5 + j*lda] += c6;
+ C[i+6 + j*lda] += c7;
+ C[i+7 + j*lda] += c8;
+ }
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ if(coreid > 1) return;
+ int i,j,k,l;
+ data_t element1, element2, element3, element4, element5, element6, element7, element8;
+ int row, row2;
+ int column1, column2, column3, column4, column5, column6, column7, column8;
+ data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ if (coreid == 0){
+ for (i=0; i<32; i+=2){
+ row = i*32;
+ row2 = (i+1)*32;
+ for (j=0; j<16; j+=4){
+ element1 = A[row+j];
+ element2 = A[row+j+1];
+ element3 = A[row+j+2];
+ element4 = A[row+j+3];
+ column1 = j*32;
+ column2 = (j+1)*32;
+ column3 = (j+2)*32;
+ column4 = (j+3)*32;
+ element5 = A[row2+j];
+ element6 = A[row2+j+1];
+ element7 = A[row2+j+2];
+ element8 = A[row2+j+3];
+
+ for (k=0; k<32; k+=4){
+ temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
+ temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
+ temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
+ temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
+ temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
+ temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
+ temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
+ temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
+ }
+
+
+ }
+ for (l=0; l<32; l++){
+ C[row+l]+=temp[l];
+ C[row2+l]+=temp2[l];
+ temp[l]=0;
+ temp2[l]=0;
+ }
+
+ }
+ }
+ if(coreid == 1 || ncores == 1) {
+ for (i=0; i<32; i+=2){
+ row = (31-i)*32;
+ row2 = (31-i-1)*32;
+ for (j=16; j<32; j+=4){
+ element1 = A[row+j];
+ element2 = A[row+j+1];
+ element3 = A[row+j+2];
+ element4 = A[row+j+3];
+ element5 = A[row2+j];
+ element6 = A[row2+j+1];
+ element7 = A[row2+j+2];
+ element8 = A[row2+j+3];
+ column1 = j*32;
+ column2 = (j+1)*32;
+ column3 = (j+2)*32;
+ column4 = (j+3)*32;
+ for (k=0; k<32; k+=4){
+ temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
+ temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
+ temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
+ temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
+ temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
+ temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
+ temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
+ temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
+ }
+
+
+
+ }
+ for (l=0; l<32; l++){
+ C[row+l]+=temp[l];
+ C[row2+l]+=temp2[l];
+ temp[l]=0;
+ temp2[l]=0;
+ }
+ }
+ }
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+
+ for ( i = 0; i < lda; i+=2 )
+ {
+ for (k = 0; k < lda; k+=4)
+ {
+ int d0 = B[k*lda + i];
+ int c0 = B[k*lda + i + 1];
+ int d1 = B[(k+1)*lda + i];
+ int c1 = B[(k+1)*lda + i + 1];
+ int d2 = B[(k+2)*lda + i];
+ int c2 = B[(k+2)*lda + i + 1];
+ int d3 = B[(k+3)*lda + i];
+ int c3 = B[(k+3)*lda + i + 1];
+
+ for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4)
+ {
+
+ int sum = A[j*lda + k] * d0;
+ sum += A[j*lda + k + 1] * d1;
+ sum += A[j*lda + k + 2] * d2;
+ sum += A[j*lda + k + 3] * d3;
+ C[j*lda +i] += sum;
+
+ sum = A[j*lda + k] * c0;
+ sum += A[j*lda + k + 1] * c1;
+ sum += A[j*lda + k + 2] * c2;
+ sum += A[j*lda + k + 3] * c3;
+ C[j*lda + i + 1] += sum;
+
+ sum = A[(j+1)*lda + k] * d0;
+ sum += A[(j+1)*lda + k + 1] * d1;
+ sum += A[(j+1)*lda + k + 2] * d2;
+ sum += A[(j+1)*lda + k + 3] * d3;
+ C[(j+1)*lda +i] += sum;
+
+ sum = A[(j+1)*lda + k] * c0;
+ sum += A[(j+1)*lda + k + 1] * c1;
+ sum += A[(j+1)*lda + k + 2] * c2;
+ sum += A[(j+1)*lda + k + 3] * c3;
+ C[(j+1)*lda + i + 1] += sum;
+
+ sum = A[(j+2)*lda + k] * d0;
+ sum += A[(j+2)*lda + k + 1] * d1;
+ sum += A[(j+2)*lda + k + 2] * d2;
+ sum += A[(j+2)*lda + k + 3] * d3;
+ C[(j+2)*lda +i] += sum;
+
+ sum = A[(j+2)*lda + k] * c0;
+ sum += A[(j+2)*lda + k + 1] * c1;
+ sum += A[(j+2)*lda + k + 2] * c2;
+ sum += A[(j+2)*lda + k + 3] * c3;
+ C[(j+2)*lda + i + 1] += sum;
+
+ sum = A[(j+3)*lda + k] * d0;
+ sum += A[(j+3)*lda + k + 1] * d1;
+ sum += A[(j+3)*lda + k + 2] * d2;
+ sum += A[(j+3)*lda + k + 3] * d3;
+ C[(j+3)*lda +i] += sum;
+
+ sum = A[(j+3)*lda + k] * c0;
+ sum += A[(j+3)*lda + k + 1] * c1;
+ sum += A[(j+3)*lda + k + 2] * c2;
+ sum += A[(j+3)*lda + k + 3] * c3;
+ C[(j+3)*lda + i + 1] += sum;
+
+ }
+ barrier(ncores);
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+//----------MSI--------------
+/*
+ int i,j,k;
+ barrier(nc);
+ for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
+ for(i = 0; i < lda; i+=4) {
+ data_t Cval0 = 0;
+ data_t Cval1 = 0;
+ data_t Cval2 = 0;
+ data_t Cval3 = 0;
+ for(k = 0; k < lda; k++) {
+ Cval0 += A[j*lda+k]*B[k*lda+i];
+ Cval1 += A[j*lda+k]*B[k*lda+i+1];
+ Cval2 += A[j*lda+k]*B[k*lda+i+2];
+ Cval3 += A[j*lda+k]*B[k*lda+i+3];
+ }
+ C[j*lda+i] = Cval0;
+ C[j*lda+i+1] = Cval1;
+ C[j*lda+i+2] = Cval2;
+ C[j*lda+i+3] = Cval3;
+ }
+ }
+*/
+
+//------------------MI-------------------
+
+ int i,j,k;
+ barrier(ncores);
+ for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
+ for(i = 0; i < lda; i+=4) {
+ data_t Cval0 = 0;
+ data_t Cval1 = 0;
+ data_t Cval2 = 0;
+ data_t Cval3 = 0;
+ if(coreid == 0) {
+ for(k = 0; k < lda; k++) {
+ Cval0 += A[j*lda+k]*B[k*lda+i];
+ Cval1 += A[j*lda+k]*B[k*lda+i+1];
+ Cval2 += A[j*lda+k]*B[k*lda+i+2];
+ Cval3 += A[j*lda+k]*B[k*lda+i+3];
+ }
+ } else {
+ for(k = lda-1; k >= 0; k--) {
+ Cval0 += A[j*lda+k]*B[k*lda+i];
+ Cval1 += A[j*lda+k]*B[k*lda+i+1];
+ Cval2 += A[j*lda+k]*B[k*lda+i+2];
+ Cval3 += A[j*lda+k]*B[k*lda+i+3];
+ }
+ }
+ C[j*lda+i] = Cval0;
+ C[j*lda+i+1] = Cval1;
+ C[j*lda+i+2] = Cval2;
+ C[j*lda+i+3] = Cval3;
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k, ii, jj, bsize, start;
+ bsize = 16;
+ start = bsize*coreid;
+ for ( jj = start; jj < lda; jj += bsize*ncores) {
+ int first = 1;
+ for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) {
+ first = 0;
+ for ( j = jj; j < lda && j < jj + bsize; j+=4) {
+ for ( i = ii; i < lda && i < ii + bsize; i+=2) {
+ data_t c1 = C[i + j*lda];
+ data_t c2 = C[i + j*lda + 1];
+ data_t c3 = C[i + (j+1)*lda];
+ data_t c4 = C[i + (j+1)*lda + 1];
+ data_t c5 = C[i + (j+2)*lda];
+ data_t c6 = C[i + (j+2)*lda + 1];
+ data_t c7 = C[i + (j+3)*lda];
+ data_t c8 = C[i + (j+3)*lda + 1];
+ for ( k = 0; k < lda; k+=8){
+ for (int x = 0; x < 8; x++) {
+ data_t a = A[j*lda + k+x];
+ data_t a1 = A[(j+1)*lda +k+x];
+ data_t a2 = A[(j+2)*lda +k+x];
+ data_t a3 = A[(j+3)*lda +k+x];
+ data_t b1 = B[(k+x)*lda + i];
+ data_t b2 = B[(k+x)*lda + i + 1];
+ c1 += a * b1;
+ c2 += a * b2;
+ c3 += a1* b1;
+ c4 += a1* b2;
+ c5 += a2* b1;
+ c6 += a2* b2;
+ c7 += a3* b1;
+ c8 += a3* b2;
+ }
+ }
+ C[i + j*lda] = c1;
+ C[i + j*lda + 1] = c2;
+ C[i + (j+1)*lda] = c3;
+ C[i + (j+1)*lda + 1] = c4;
+ C[i + (j+2)*lda] = c5;
+ C[i + (j+2)*lda + 1] = c6;
+ C[i + (j+3)*lda] = c7;
+ C[i + (j+3)*lda + 1] = c8;
+ }
+ }
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ if(coreid > 1) return;
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k, x;
+ data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
+
+
+ if(coreid == 0) {
+ for(j = 0; j < 32; j++) {
+ temp0 = C[j*lda];
+ temp1 = C[1 + j*lda];
+ temp2 = C[2 + j*lda];
+ temp3 = C[3 + j*lda];
+ temp4 = C[4 + j*lda];
+ temp5 = C[5 + j*lda];
+ temp6 = C[6 + j*lda];
+ temp7 = C[7 + j*lda];
+ temp8 = C[8 + j*lda];
+ temp9 = C[9 + j*lda];
+ temp10 = C[10 + j*lda];
+ temp11 = C[11 + j*lda];
+ temp12 = C[12 + j*lda];
+ temp13 = C[13 + j*lda];
+ temp14 = C[14 + j*lda];
+ temp15 = C[15 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[k*lda];
+ temp1 += A[j*lda + k] * B[1 + k*lda];
+ temp2 += A[j*lda + k] * B[2 + k*lda];
+ temp3 += A[j*lda + k] * B[3 + k*lda];
+ temp4 += A[j*lda + k] * B[4 + k*lda];
+ temp5 += A[j*lda + k] * B[5 + k*lda];
+ temp6 += A[j*lda + k] * B[6 + k*lda];
+ temp7 += A[j*lda + k] * B[7 + k*lda];
+ temp8 += A[j*lda + k] * B[8 + k*lda];
+ temp9 += A[j*lda + k] * B[9 + k*lda];
+ temp10 += A[j*lda + k] * B[10 + k*lda];
+ temp11 += A[j*lda + k] * B[11 + k*lda];
+ temp12 += A[j*lda + k] * B[12 + k*lda];
+ temp13 += A[j*lda + k] * B[13 + k*lda];
+ temp14 += A[j*lda + k] * B[14 + k*lda];
+ temp15 += A[j*lda + k] * B[15 + k*lda];
+ }
+ C[j*lda] = temp0;
+ C[1 + j*lda] = temp1;
+ C[2 + j*lda] = temp2;
+ C[3 + j*lda] = temp3;
+ C[4 + j*lda] = temp4;
+ C[5 + j*lda] = temp5;
+ C[6 + j*lda] = temp6;
+ C[7 + j*lda] = temp7;
+ C[8 + j*lda] = temp8;
+ C[9 + j*lda] = temp9;
+ C[10 + j*lda] = temp10;
+ C[11 + j*lda] = temp11;
+ C[12 + j*lda] = temp12;
+ C[13 + j*lda] = temp13;
+ C[14 + j*lda] = temp14;
+ C[15 + j*lda] = temp15;
+ }
+ }
+
+ if(coreid == 1 || ncores == 1) {
+ for(j = 16; j < 32; j++) {
+ temp0 = C[16 + j*lda];
+ temp1 = C[17 + j*lda];
+ temp2 = C[18 + j*lda];
+ temp3 = C[19 + j*lda];
+ temp4 = C[20 + j*lda];
+ temp5 = C[21 + j*lda];
+ temp6 = C[22 + j*lda];
+ temp7 = C[23 + j*lda];
+ temp8 = C[24 + j*lda];
+ temp9 = C[25 + j*lda];
+ temp10 = C[26 + j*lda];
+ temp11 = C[27 + j*lda];
+ temp12 = C[28 + j*lda];
+ temp13 = C[29 + j*lda];
+ temp14 = C[30 + j*lda];
+ temp15 = C[31 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[16 + k*lda];
+ temp1 += A[j*lda + k] * B[17 + k*lda];
+ temp2 += A[j*lda + k] * B[18 + k*lda];
+ temp3 += A[j*lda + k] * B[19 + k*lda];
+ temp4 += A[j*lda + k] * B[20 + k*lda];
+ temp5 += A[j*lda + k] * B[21 + k*lda];
+ temp6 += A[j*lda + k] * B[22 + k*lda];
+ temp7 += A[j*lda + k] * B[23 + k*lda];
+ temp8 += A[j*lda + k] * B[24 + k*lda];
+ temp9 += A[j*lda + k] * B[25 + k*lda];
+ temp10 += A[j*lda + k] * B[26 + k*lda];
+ temp11 += A[j*lda + k] * B[27 + k*lda];
+ temp12 += A[j*lda + k] * B[28 + k*lda];
+ temp13 += A[j*lda + k] * B[29 + k*lda];
+ temp14 += A[j*lda + k] * B[30 + k*lda];
+ temp15 += A[j*lda + k] * B[31 + k*lda];
+ }
+ C[16 + j*lda] = temp0;
+ C[17 + j*lda] = temp1;
+ C[18 + j*lda] = temp2;
+ C[19 + j*lda] = temp3;
+ C[20 + j*lda] = temp4;
+ C[21 + j*lda] = temp5;
+ C[22 + j*lda] = temp6;
+ C[23 + j*lda] = temp7;
+ C[24 + j*lda] = temp8;
+ C[25 + j*lda] = temp9;
+ C[26 + j*lda] = temp10;
+ C[27 + j*lda] = temp11;
+ C[28 + j*lda] = temp12;
+ C[29 + j*lda] = temp13;
+ C[30 + j*lda] = temp14;
+ C[31 + j*lda] = temp15;
+ }
+ for(j = 0; j <16; j++) {
+ temp0 = C[16 + j*lda];
+ temp1 = C[17 + j*lda];
+ temp2 = C[18 + j*lda];
+ temp3 = C[19 + j*lda];
+ temp4 = C[20 + j*lda];
+ temp5 = C[21 + j*lda];
+ temp6 = C[22 + j*lda];
+ temp7 = C[23 + j*lda];
+ temp8 = C[24 + j*lda];
+ temp9 = C[25 + j*lda];
+ temp10 = C[26 + j*lda];
+ temp11 = C[27 + j*lda];
+ temp12 = C[28 + j*lda];
+ temp13 = C[29 + j*lda];
+ temp14 = C[30 + j*lda];
+ temp15 = C[31 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[16 + k*lda];
+ temp1 += A[j*lda + k] * B[17 + k*lda];
+ temp2 += A[j*lda + k] * B[18 + k*lda];
+ temp3 += A[j*lda + k] * B[19 + k*lda];
+ temp4 += A[j*lda + k] * B[20 + k*lda];
+ temp5 += A[j*lda + k] * B[21 + k*lda];
+ temp6 += A[j*lda + k] * B[22 + k*lda];
+ temp7 += A[j*lda + k] * B[23 + k*lda];
+ temp8 += A[j*lda + k] * B[24 + k*lda];
+ temp9 += A[j*lda + k] * B[25 + k*lda];
+ temp10 += A[j*lda + k] * B[26 + k*lda];
+ temp11 += A[j*lda + k] * B[27 + k*lda];
+ temp12 += A[j*lda + k] * B[28 + k*lda];
+ temp13 += A[j*lda + k] * B[29 + k*lda];
+ temp14 += A[j*lda + k] * B[30 + k*lda];
+ temp15 += A[j*lda + k] * B[31 + k*lda];
+ }
+ C[16 + j*lda] = temp0;
+ C[17 + j*lda] = temp1;
+ C[18 + j*lda] = temp2;
+ C[19 + j*lda] = temp3;
+ C[20 + j*lda] = temp4;
+ C[21 + j*lda] = temp5;
+ C[22 + j*lda] = temp6;
+ C[23 + j*lda] = temp7;
+ C[24 + j*lda] = temp8;
+ C[25 + j*lda] = temp9;
+ C[26 + j*lda] = temp10;
+ C[27 + j*lda] = temp11;
+ C[28 + j*lda] = temp12;
+ C[29 + j*lda] = temp13;
+ C[30 + j*lda] = temp14;
+ C[31 + j*lda] = temp15;
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i,j,k,l;
+ data_t element1, element2, element3, element4, element5, element6, element7, element8;
+ int row, row2;
+ int column1, column2, column3, column4, column5, column6, column7, column8;
+ data_t temp[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ data_t temp2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ if (coreid == 0){
+ for (i=0; i<lda; i+=2){
+ row = i*lda;
+ row2 = (i+1)*lda;
+ for (j=0; j<16; j+=4){
+ element1 = A[row+j];
+ element2 = A[row+j+1];
+ element3 = A[row+j+2];
+ element4 = A[row+j+3];
+ column1 = j*32;
+ column2 = (j+1)*32;
+ column3 = (j+2)*32;
+ column4 = (j+3)*32;
+ element5 = A[row2+j];
+ element6 = A[row2+j+1];
+ element7 = A[row2+j+2];
+ element8 = A[row2+j+3];
+
+ for (k=0; k<32; k+=4){
+ temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
+ temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
+ temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
+ temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
+ temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
+ temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
+ temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
+ temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
+ }
+ if (j==12){
+ for (l=0; l<32; l++){
+ C[row+l]+=temp[l];
+ C[row2+l]+=temp2[l];
+ temp[l]=0;
+ temp2[l]=0;
+ }
+ }
+ }
+ }
+ }
+ if (coreid==1 || ncores == 1){
+ for (i=0; i<32; i+=2){
+ row = (31-i)*lda;
+ row2 = (31-i-1)*lda;
+ for (j=16; j<32; j+=4){
+ element1 = A[row+j];
+ element2 = A[row+j+1];
+ element3 = A[row+j+2];
+ element4 = A[row+j+3];
+ element5 = A[row2+j];
+ element6 = A[row2+j+1];
+ element7 = A[row2+j+2];
+ element8 = A[row2+j+3];
+ column1 = j*32;
+ column2 = (j+1)*32;
+ column3 = (j+2)*32;
+ column4 = (j+3)*32;
+ for (k=0; k<32; k+=4){
+ temp[k]+=element1*B[column1+k]+element2*B[column2+k]+element3*B[column3+k]+element4*B[column4+k];
+ temp[k+1]+=element1*B[column1+k+1]+element2*B[column2+k+1]+element3*B[column3+k+1]+element4*B[column4+k+1];
+ temp[k+2]+=element1*B[column1+k+2]+element2*B[column2+k+2]+element3*B[column3+k+2]+element4*B[column4+k+2];
+ temp[k+3]+=element1*B[column1+k+3]+element2*B[column2+k+3]+element3*B[column3+k+3]+element4*B[column4+k+3];
+ temp2[k]+=element5*B[column1+k]+element6*B[column2+k]+element7*B[column3+k]+element8*B[column4+k];
+ temp2[k+1]+=element5*B[column1+k+1]+element6*B[column2+k+1]+element7*B[column3+k+1]+element8*B[column4+k+1];
+ temp2[k+2]+=element5*B[column1+k+2]+element6*B[column2+k+2]+element7*B[column3+k+2]+element8*B[column4+k+2];
+ temp2[k+3]+=element5*B[column1+k+3]+element6*B[column2+k+3]+element7*B[column3+k+3]+element8*B[column4+k+3];
+ }
+ if (j==28){
+ for (l=0; l<32; l++){
+ C[row+l]+=temp[l];
+ C[row2+l]+=temp2[l];
+ temp[l]=0;
+ temp2[l]=0;
+ }
+ }
+ }
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k, n, m, c1, c2;
+ for ( j = coreid; j < lda; j += 2*ncores ) {
+ for ( i = 0; i < lda; i += 1 ){
+ c1 = 0; //global vars c1, c2
+ c2 = 0;
+ for ( k = 0; k < lda; k += 1 ) {
+ c1 += A[j * lda + k] * B[k*lda + i];
+ c2 += A[(j+ncores) * lda + k] * B[k*lda + i];
+ }
+
+ C[i + j * lda] = c1;
+ C[i + (j+ncores) * lda] = c2;
+ barrier(ncores);
+ }
+ }
+
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ //----------------------------------------------------------------version 2.11 optmize j,use core 1 j from 0 to 15 MSI 98k i = j*lda
+ //----------------------------------------------------------------version 2.12 not use i = j *lda MSI 95k
+ static __thread data_t TempA[8];
+ static __thread data_t TempB[8];
+ static __thread int j,m,n,i,k;
+
+ if(coreid == 1 || ncores == 1)
+ {
+ for ( j = 16; j < 32; j++ )
+ {
+
+ for ( m = 0; m < 4; m++ )
+ {
+
+ TempA[0] = A[j*lda+0+8*m];
+ TempA[1] = A[j*lda+1+8*m];
+ TempA[2] = A[j*lda+2+8*m];
+ TempA[3] = A[j*lda+3+8*m];
+ TempA[4] = A[j*lda+4+8*m];
+ TempA[5] = A[j*lda+5+8*m];
+ TempA[6] = A[j*lda+6+8*m];
+ TempA[7] = A[j*lda+7+8*m];
+
+ for( n = 0; n < 4; n++)
+ {
+ TempB[0] = B[(0+8*m)*lda+0+8*n];
+ TempB[1] = B[(0+8*m)*lda+1+8*n];
+ TempB[2] = B[(0+8*m)*lda+2+8*n];
+ TempB[3] = B[(0+8*m)*lda+3+8*n];
+ TempB[4] = B[(0+8*m)*lda+4+8*n];
+ TempB[5] = B[(0+8*m)*lda+5+8*n];
+ TempB[6] = B[(0+8*m)*lda+6+8*n];
+ TempB[7] = B[(0+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[0] * TempB[0];
+ C[1+8*n+j*lda] += TempA[0] * TempB[1];
+ C[2+8*n+j*lda] += TempA[0] * TempB[2];
+ C[3+8*n+j*lda] += TempA[0] * TempB[3];
+ C[4+8*n+j*lda] += TempA[0] * TempB[4];
+ C[5+8*n+j*lda] += TempA[0] * TempB[5];
+ C[6+8*n+j*lda] += TempA[0] * TempB[6];
+ C[7+8*n+j*lda] += TempA[0] * TempB[7];
+
+
+
+ TempB[0] = B[(1+8*m)*lda+0+8*n];
+ TempB[1] = B[(1+8*m)*lda+1+8*n];
+ TempB[2] = B[(1+8*m)*lda+2+8*n];
+ TempB[3] = B[(1+8*m)*lda+3+8*n];
+ TempB[4] = B[(1+8*m)*lda+4+8*n];
+ TempB[5] = B[(1+8*m)*lda+5+8*n];
+ TempB[6] = B[(1+8*m)*lda+6+8*n];
+ TempB[7] = B[(1+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[1] * TempB[0];
+ C[1+8*n+j*lda] += TempA[1] * TempB[1];
+ C[2+8*n+j*lda] += TempA[1] * TempB[2];
+ C[3+8*n+j*lda] += TempA[1] * TempB[3];
+ C[4+8*n+j*lda] += TempA[1] * TempB[4];
+ C[5+8*n+j*lda] += TempA[1] * TempB[5];
+ C[6+8*n+j*lda] += TempA[1] * TempB[6];
+ C[7+8*n+j*lda] += TempA[1] * TempB[7];
+
+
+
+ TempB[0] = B[(2+8*m)*lda+0+8*n];
+ TempB[1] = B[(2+8*m)*lda+1+8*n];
+ TempB[2] = B[(2+8*m)*lda+2+8*n];
+ TempB[3] = B[(2+8*m)*lda+3+8*n];
+ TempB[4] = B[(2+8*m)*lda+4+8*n];
+ TempB[5] = B[(2+8*m)*lda+5+8*n];
+ TempB[6] = B[(2+8*m)*lda+6+8*n];
+ TempB[7] = B[(2+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[2] * TempB[0];
+ C[1+8*n+j*lda] += TempA[2] * TempB[1];
+ C[2+8*n+j*lda] += TempA[2] * TempB[2];
+ C[3+8*n+j*lda] += TempA[2] * TempB[3];
+ C[4+8*n+j*lda] += TempA[2] * TempB[4];
+ C[5+8*n+j*lda] += TempA[2] * TempB[5];
+ C[6+8*n+j*lda] += TempA[2] * TempB[6];
+ C[7+8*n+j*lda] += TempA[2] * TempB[7];
+
+
+
+ TempB[0] = B[(3+8*m)*lda+0+8*n];
+ TempB[1] = B[(3+8*m)*lda+1+8*n];
+ TempB[2] = B[(3+8*m)*lda+2+8*n];
+ TempB[3] = B[(3+8*m)*lda+3+8*n];
+ TempB[4] = B[(3+8*m)*lda+4+8*n];
+ TempB[5] = B[(3+8*m)*lda+5+8*n];
+ TempB[6] = B[(3+8*m)*lda+6+8*n];
+ TempB[7] = B[(3+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[3] * TempB[0];
+ C[1+8*n+j*lda] += TempA[3] * TempB[1];
+ C[2+8*n+j*lda] += TempA[3] * TempB[2];
+ C[3+8*n+j*lda] += TempA[3] * TempB[3];
+ C[4+8*n+j*lda] += TempA[3] * TempB[4];
+ C[5+8*n+j*lda] += TempA[3] * TempB[5];
+ C[6+8*n+j*lda] += TempA[3] * TempB[6];
+ C[7+8*n+j*lda] += TempA[3] * TempB[7];
+
+
+ TempB[0] = B[(4+8*m)*lda+0+8*n];
+ TempB[1] = B[(4+8*m)*lda+1+8*n];
+ TempB[2] = B[(4+8*m)*lda+2+8*n];
+ TempB[3] = B[(4+8*m)*lda+3+8*n];
+ TempB[4] = B[(4+8*m)*lda+4+8*n];
+ TempB[5] = B[(4+8*m)*lda+5+8*n];
+ TempB[6] = B[(4+8*m)*lda+6+8*n];
+ TempB[7] = B[(4+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[4] * TempB[0];
+ C[1+8*n+j*lda] += TempA[4] * TempB[1];
+ C[2+8*n+j*lda] += TempA[4] * TempB[2];
+ C[3+8*n+j*lda] += TempA[4] * TempB[3];
+ C[4+8*n+j*lda] += TempA[4] * TempB[4];
+ C[5+8*n+j*lda] += TempA[4] * TempB[5];
+ C[6+8*n+j*lda] += TempA[4] * TempB[6];
+ C[7+8*n+j*lda] += TempA[4] * TempB[7];
+
+
+
+ TempB[0] = B[(5+8*m)*lda+0+8*n];
+ TempB[1] = B[(5+8*m)*lda+1+8*n];
+ TempB[2] = B[(5+8*m)*lda+2+8*n];
+ TempB[3] = B[(5+8*m)*lda+3+8*n];
+ TempB[4] = B[(5+8*m)*lda+4+8*n];
+ TempB[5] = B[(5+8*m)*lda+5+8*n];
+ TempB[6] = B[(5+8*m)*lda+6+8*n];
+ TempB[7] = B[(5+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[5] * TempB[0];
+ C[1+8*n+j*lda] += TempA[5] * TempB[1];
+ C[2+8*n+j*lda] += TempA[5] * TempB[2];
+ C[3+8*n+j*lda] += TempA[5] * TempB[3];
+ C[4+8*n+j*lda] += TempA[5] * TempB[4];
+ C[5+8*n+j*lda] += TempA[5] * TempB[5];
+ C[6+8*n+j*lda] += TempA[5] * TempB[6];
+ C[7+8*n+j*lda] += TempA[5] * TempB[7];
+
+
+
+ TempB[0] = B[(6+8*m)*lda+0+8*n];
+ TempB[1] = B[(6+8*m)*lda+1+8*n];
+ TempB[2] = B[(6+8*m)*lda+2+8*n];
+ TempB[3] = B[(6+8*m)*lda+3+8*n];
+ TempB[4] = B[(6+8*m)*lda+4+8*n];
+ TempB[5] = B[(6+8*m)*lda+5+8*n];
+ TempB[6] = B[(6+8*m)*lda+6+8*n];
+ TempB[7] = B[(6+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[6] * TempB[0];
+ C[1+8*n+j*lda] += TempA[6] * TempB[1];
+ C[2+8*n+j*lda] += TempA[6] * TempB[2];
+ C[3+8*n+j*lda] += TempA[6] * TempB[3];
+ C[4+8*n+j*lda] += TempA[6] * TempB[4];
+ C[5+8*n+j*lda] += TempA[6] * TempB[5];
+ C[6+8*n+j*lda] += TempA[6] * TempB[6];
+ C[7+8*n+j*lda] += TempA[6] * TempB[7];
+
+
+ TempB[0] = B[(7+8*m)*lda+0+8*n];
+ TempB[1] = B[(7+8*m)*lda+1+8*n];
+ TempB[2] = B[(7+8*m)*lda+2+8*n];
+ TempB[3] = B[(7+8*m)*lda+3+8*n];
+ TempB[4] = B[(7+8*m)*lda+4+8*n];
+ TempB[5] = B[(7+8*m)*lda+5+8*n];
+ TempB[6] = B[(7+8*m)*lda+6+8*n];
+ TempB[7] = B[(7+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[7] * TempB[0];
+ C[1+8*n+j*lda] += TempA[7] * TempB[1];
+ C[2+8*n+j*lda] += TempA[7] * TempB[2];
+ C[3+8*n+j*lda] += TempA[7] * TempB[3];
+ C[4+8*n+j*lda] += TempA[7] * TempB[4];
+ C[5+8*n+j*lda] += TempA[7] * TempB[5];
+ C[6+8*n+j*lda] += TempA[7] * TempB[6];
+ C[7+8*n+j*lda] += TempA[7] * TempB[7];
+ }
+
+ }
+ }
+ }
+ if(coreid ==0)
+ {
+ for ( j = 0; j < 16; j++ )
+ {
+
+ for ( m = 0; m < 4; m++ )
+ {
+
+ TempA[0] = A[j*lda+0+8*m];
+ TempA[1] = A[j*lda+1+8*m];
+ TempA[2] = A[j*lda+2+8*m];
+ TempA[3] = A[j*lda+3+8*m];
+ TempA[4] = A[j*lda+4+8*m];
+ TempA[5] = A[j*lda+5+8*m];
+ TempA[6] = A[j*lda+6+8*m];
+ TempA[7] = A[j*lda+7+8*m];
+
+ for( n = 0; n < 4; n++)
+ {
+ TempB[0] = B[(0+8*m)*lda+0+8*n];
+ TempB[1] = B[(0+8*m)*lda+1+8*n];
+ TempB[2] = B[(0+8*m)*lda+2+8*n];
+ TempB[3] = B[(0+8*m)*lda+3+8*n];
+ TempB[4] = B[(0+8*m)*lda+4+8*n];
+ TempB[5] = B[(0+8*m)*lda+5+8*n];
+ TempB[6] = B[(0+8*m)*lda+6+8*n];
+ TempB[7] = B[(0+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[0] * TempB[0];
+ C[1+8*n+j*lda] += TempA[0] * TempB[1];
+ C[2+8*n+j*lda] += TempA[0] * TempB[2];
+ C[3+8*n+j*lda] += TempA[0] * TempB[3];
+ C[4+8*n+j*lda] += TempA[0] * TempB[4];
+ C[5+8*n+j*lda] += TempA[0] * TempB[5];
+ C[6+8*n+j*lda] += TempA[0] * TempB[6];
+ C[7+8*n+j*lda] += TempA[0] * TempB[7];
+
+
+
+ TempB[0] = B[(1+8*m)*lda+0+8*n];
+ TempB[1] = B[(1+8*m)*lda+1+8*n];
+ TempB[2] = B[(1+8*m)*lda+2+8*n];
+ TempB[3] = B[(1+8*m)*lda+3+8*n];
+ TempB[4] = B[(1+8*m)*lda+4+8*n];
+ TempB[5] = B[(1+8*m)*lda+5+8*n];
+ TempB[6] = B[(1+8*m)*lda+6+8*n];
+ TempB[7] = B[(1+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[1] * TempB[0];
+ C[1+8*n+j*lda] += TempA[1] * TempB[1];
+ C[2+8*n+j*lda] += TempA[1] * TempB[2];
+ C[3+8*n+j*lda] += TempA[1] * TempB[3];
+ C[4+8*n+j*lda] += TempA[1] * TempB[4];
+ C[5+8*n+j*lda] += TempA[1] * TempB[5];
+ C[6+8*n+j*lda] += TempA[1] * TempB[6];
+ C[7+8*n+j*lda] += TempA[1] * TempB[7];
+
+
+
+ TempB[0] = B[(2+8*m)*lda+0+8*n];
+ TempB[1] = B[(2+8*m)*lda+1+8*n];
+ TempB[2] = B[(2+8*m)*lda+2+8*n];
+ TempB[3] = B[(2+8*m)*lda+3+8*n];
+ TempB[4] = B[(2+8*m)*lda+4+8*n];
+ TempB[5] = B[(2+8*m)*lda+5+8*n];
+ TempB[6] = B[(2+8*m)*lda+6+8*n];
+ TempB[7] = B[(2+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[2] * TempB[0];
+ C[1+8*n+j*lda] += TempA[2] * TempB[1];
+ C[2+8*n+j*lda] += TempA[2] * TempB[2];
+ C[3+8*n+j*lda] += TempA[2] * TempB[3];
+ C[4+8*n+j*lda] += TempA[2] * TempB[4];
+ C[5+8*n+j*lda] += TempA[2] * TempB[5];
+ C[6+8*n+j*lda] += TempA[2] * TempB[6];
+ C[7+8*n+j*lda] += TempA[2] * TempB[7];
+
+
+
+ TempB[0] = B[(3+8*m)*lda+0+8*n];
+ TempB[1] = B[(3+8*m)*lda+1+8*n];
+ TempB[2] = B[(3+8*m)*lda+2+8*n];
+ TempB[3] = B[(3+8*m)*lda+3+8*n];
+ TempB[4] = B[(3+8*m)*lda+4+8*n];
+ TempB[5] = B[(3+8*m)*lda+5+8*n];
+ TempB[6] = B[(3+8*m)*lda+6+8*n];
+ TempB[7] = B[(3+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[3] * TempB[0];
+ C[1+8*n+j*lda] += TempA[3] * TempB[1];
+ C[2+8*n+j*lda] += TempA[3] * TempB[2];
+ C[3+8*n+j*lda] += TempA[3] * TempB[3];
+ C[4+8*n+j*lda] += TempA[3] * TempB[4];
+ C[5+8*n+j*lda] += TempA[3] * TempB[5];
+ C[6+8*n+j*lda] += TempA[3] * TempB[6];
+ C[7+8*n+j*lda] += TempA[3] * TempB[7];
+
+
+ TempB[0] = B[(4+8*m)*lda+0+8*n];
+ TempB[1] = B[(4+8*m)*lda+1+8*n];
+ TempB[2] = B[(4+8*m)*lda+2+8*n];
+ TempB[3] = B[(4+8*m)*lda+3+8*n];
+ TempB[4] = B[(4+8*m)*lda+4+8*n];
+ TempB[5] = B[(4+8*m)*lda+5+8*n];
+ TempB[6] = B[(4+8*m)*lda+6+8*n];
+ TempB[7] = B[(4+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[4] * TempB[0];
+ C[1+8*n+j*lda] += TempA[4] * TempB[1];
+ C[2+8*n+j*lda] += TempA[4] * TempB[2];
+ C[3+8*n+j*lda] += TempA[4] * TempB[3];
+ C[4+8*n+j*lda] += TempA[4] * TempB[4];
+ C[5+8*n+j*lda] += TempA[4] * TempB[5];
+ C[6+8*n+j*lda] += TempA[4] * TempB[6];
+ C[7+8*n+j*lda] += TempA[4] * TempB[7];
+
+
+
+ TempB[0] = B[(5+8*m)*lda+0+8*n];
+ TempB[1] = B[(5+8*m)*lda+1+8*n];
+ TempB[2] = B[(5+8*m)*lda+2+8*n];
+ TempB[3] = B[(5+8*m)*lda+3+8*n];
+ TempB[4] = B[(5+8*m)*lda+4+8*n];
+ TempB[5] = B[(5+8*m)*lda+5+8*n];
+ TempB[6] = B[(5+8*m)*lda+6+8*n];
+ TempB[7] = B[(5+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[5] * TempB[0];
+ C[1+8*n+j*lda] += TempA[5] * TempB[1];
+ C[2+8*n+j*lda] += TempA[5] * TempB[2];
+ C[3+8*n+j*lda] += TempA[5] * TempB[3];
+ C[4+8*n+j*lda] += TempA[5] * TempB[4];
+ C[5+8*n+j*lda] += TempA[5] * TempB[5];
+ C[6+8*n+j*lda] += TempA[5] * TempB[6];
+ C[7+8*n+j*lda] += TempA[5] * TempB[7];
+
+
+
+ TempB[0] = B[(6+8*m)*lda+0+8*n];
+ TempB[1] = B[(6+8*m)*lda+1+8*n];
+ TempB[2] = B[(6+8*m)*lda+2+8*n];
+ TempB[3] = B[(6+8*m)*lda+3+8*n];
+ TempB[4] = B[(6+8*m)*lda+4+8*n];
+ TempB[5] = B[(6+8*m)*lda+5+8*n];
+ TempB[6] = B[(6+8*m)*lda+6+8*n];
+ TempB[7] = B[(6+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[6] * TempB[0];
+ C[1+8*n+j*lda] += TempA[6] * TempB[1];
+ C[2+8*n+j*lda] += TempA[6] * TempB[2];
+ C[3+8*n+j*lda] += TempA[6] * TempB[3];
+ C[4+8*n+j*lda] += TempA[6] * TempB[4];
+ C[5+8*n+j*lda] += TempA[6] * TempB[5];
+ C[6+8*n+j*lda] += TempA[6] * TempB[6];
+ C[7+8*n+j*lda] += TempA[6] * TempB[7];
+
+
+ TempB[0] = B[(7+8*m)*lda+0+8*n];
+ TempB[1] = B[(7+8*m)*lda+1+8*n];
+ TempB[2] = B[(7+8*m)*lda+2+8*n];
+ TempB[3] = B[(7+8*m)*lda+3+8*n];
+ TempB[4] = B[(7+8*m)*lda+4+8*n];
+ TempB[5] = B[(7+8*m)*lda+5+8*n];
+ TempB[6] = B[(7+8*m)*lda+6+8*n];
+ TempB[7] = B[(7+8*m)*lda+7+8*n];
+
+ C[0+8*n+j*lda] += TempA[7] * TempB[0];
+ C[1+8*n+j*lda] += TempA[7] * TempB[1];
+ C[2+8*n+j*lda] += TempA[7] * TempB[2];
+ C[3+8*n+j*lda] += TempA[7] * TempB[3];
+ C[4+8*n+j*lda] += TempA[7] * TempB[4];
+ C[5+8*n+j*lda] += TempA[7] * TempB[5];
+ C[6+8*n+j*lda] += TempA[7] * TempB[6];
+ C[7+8*n+j*lda] += TempA[7] * TempB[7];
+ }
+
+ }
+ }
+ }
+
+
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"\r
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )\r
+{\r
+ if(coreid > 1) return;
+ static __thread int i, j, k;\r
+ static __thread data_t tempA0, tempA1, tempA2, tempA3, tempA4, tempA5, tempA6, tempA7;\r
+ static __thread data_t tempC0, tempC1, tempC2, tempC3, tempC4, tempC5, tempC6, tempC7, tempC8, tempC9, tempC10, tempC11, tempC12, tempC13, tempC14, tempC15;\r
+\r
+ static __thread int start, end, jStride, jToRow, jToCol;\r
+ static data_t A1[1024], B1[1024];;\r
+ \r
+ start = coreid << 9;\r
+ end = ((ncores == 1) ? 2 :(coreid+1)) << 9;\r
+ jStride = 8;\r
+\r
+ if (coreid == 0) { \r
+ for (j=start; j < end; j+=jStride) {\r
+ jToRow = (j>>5)<<5;\r
+ jToCol = j%32;\r
+ tempC0 = 0;\r
+ tempC1 = 0;\r
+ tempC2 = 0;\r
+ tempC3 = 0;\r
+ tempC4 = 0;\r
+ tempC5 = 0;\r
+ tempC6 = 0;\r
+ tempC7 = 0;\r
+ for ( i=0; i < lda; i+=2 ) {\r
+ tempA0 = A[i + jToRow];\r
+ tempA1 = A[i+1 + jToRow];\r
+ tempC0 += tempA0 * B[(jToCol ) + (i<<5)];\r
+ tempC1 += tempA0 * B[(jToCol+1 ) + (i<<5)];\r
+ tempC2 += tempA0 * B[(jToCol+2 ) + (i<<5)];\r
+ tempC3 += tempA0 * B[(jToCol+3 ) + (i<<5)];\r
+ tempC4 += tempA0 * B[(jToCol+4 ) + (i<<5)];\r
+ tempC5 += tempA0 * B[(jToCol+5 ) + (i<<5)];\r
+ tempC6 += tempA0 * B[(jToCol+6 ) + (i<<5)];\r
+ tempC7 += tempA0 * B[(jToCol+7 ) + (i<<5)];\r
+ tempC0 += tempA1 * B[(jToCol ) + ((i+1)<<5)];\r
+ tempC1 += tempA1 * B[(jToCol+1 ) + ((i+1)<<5)];\r
+ tempC2 += tempA1 * B[(jToCol+2 ) + ((i+1)<<5)];\r
+ tempC3 += tempA1 * B[(jToCol+3 ) + ((i+1)<<5)];\r
+ tempC4 += tempA1 * B[(jToCol+4 ) + ((i+1)<<5)];\r
+ tempC5 += tempA1 * B[(jToCol+5 ) + ((i+1)<<5)];\r
+ tempC6 += tempA1 * B[(jToCol+6 ) + ((i+1)<<5)];\r
+ tempC7 += tempA1 * B[(jToCol+7 ) + ((i+1)<<5)];\r
+ }\r
+ C[j] =tempC0;\r
+ C[j + 1 ]=tempC1;\r
+ C[j + 2 ]=tempC2;\r
+ C[j + 3 ]=tempC3;\r
+ C[j + 4 ]=tempC4;\r
+ C[j + 5 ]=tempC5;\r
+ C[j + 6 ]=tempC6;\r
+ C[j + 7 ]=tempC7;\r
+ }\r
+ }\r
+ else { \r
+ for (i = 0; i < 1024; i++) {\r
+ A1[i] = A[i];\r
+ B1[i] = B[i];\r
+ }\r
+ for (j=start; j < end; j+=jStride) {\r
+ jToRow = (j>>5)<<5;\r
+ jToCol = j%32;\r
+ tempC0 = 0;\r
+ tempC1 = 0;\r
+ tempC2 = 0;\r
+ tempC3 = 0;\r
+ tempC4 = 0;\r
+ tempC5 = 0;\r
+ tempC6 = 0;\r
+ tempC7 = 0;\r
+ for ( i=0; i < lda; i+=2 ) {\r
+ tempA0 = A1[i + jToRow];\r
+ tempA1 = A1[i+1 + jToRow];\r
+ tempC0 += tempA0 * B1[(jToCol ) + (i<<5)];\r
+ tempC1 += tempA0 * B1[(jToCol+1 ) + (i<<5)];\r
+ tempC2 += tempA0 * B1[(jToCol+2 ) + (i<<5)];\r
+ tempC3 += tempA0 * B1[(jToCol+3 ) + (i<<5)];\r
+ tempC4 += tempA0 * B1[(jToCol+4 ) + (i<<5)];\r
+ tempC5 += tempA0 * B1[(jToCol+5 ) + (i<<5)];\r
+ tempC6 += tempA0 * B1[(jToCol+6 ) + (i<<5)];\r
+ tempC7 += tempA0 * B1[(jToCol+7 ) + (i<<5)];\r
+ tempC0 += tempA1 * B1[(jToCol ) + ((i+1)<<5)];\r
+ tempC1 += tempA1 * B1[(jToCol+1 ) + ((i+1)<<5)];\r
+ tempC2 += tempA1 * B1[(jToCol+2 ) + ((i+1)<<5)];\r
+ tempC3 += tempA1 * B1[(jToCol+3 ) + ((i+1)<<5)];\r
+ tempC4 += tempA1 * B1[(jToCol+4 ) + ((i+1)<<5)];\r
+ tempC5 += tempA1 * B1[(jToCol+5 ) + ((i+1)<<5)];\r
+ tempC6 += tempA1 * B1[(jToCol+6 ) + ((i+1)<<5)];\r
+ tempC7 += tempA1 * B1[(jToCol+7 ) + ((i+1)<<5)];\r
+ }\r
+ C[j] =tempC0;\r
+ C[j + 1 ]=tempC1;\r
+ C[j + 2 ]=tempC2;\r
+ C[j + 3 ]=tempC3;\r
+ C[j + 4 ]=tempC4;\r
+ C[j + 5 ]=tempC5;\r
+ C[j + 6 ]=tempC6;\r
+ C[j + 7 ]=tempC7;\r
+ }\r
+ }\r
+}\r
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+
+#define REG_I 8
+#define REG_J 2
+#define BLOCK_I 32
+#define BLOCK_J 16
+#define BLOCK_K 16
+#define LDA 32
+#define NCORES 2
+#define MIN(X,Y) (X < Y ? X : Y)
+
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+ int i, j, k, ri, rj, ii, jj, kk;
+ data_t *Aj, *Cj, *Bi;
+ data_t c[REG_I][REG_J], a[REG_J], b[REG_I];
+ size_t start = coreid * (LDA / NCORES), end = (coreid == NCORES - 1 ? LDA : (coreid + 1) * (LDA / NCORES));
+
+ /* if (coreid > 0) { */
+ /* return; */
+ /* } */
+ /* start = 0, end = lda; */
+ if (ncores == NCORES && lda == LDA) {
+ for (jj = start; jj < end; jj += BLOCK_J) {
+ int kk_start= (coreid == 0 ? 0 : LDA/2) ,kk_end = (coreid == 0 ? LDA/2 : LDA);
+ for (kk = kk_start; kk < kk_end; kk += BLOCK_K) {
+ // for (ii = 0; ii < LDA; ii += BLOCK_I)
+ for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) {
+ Aj = A + j*LDA;
+ Cj = C + j*LDA;
+ for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) {
+ /* Load C in register blocks. */
+ Bi = B + i;
+ for (ri = 0; ri < REG_I; ri++) {
+ for (rj = 0; rj < REG_J; rj++) {
+ c[ri][rj] = Cj[i + ri + ( rj)*LDA];
+ }
+ }
+
+
+ for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) {
+ for (ri = 0; ri < REG_I; ri++) {
+ b[ri] = Bi[k*LDA + ri];
+ }
+ /* Compute C in register blocks. */
+ for (rj = 0; rj < REG_J; rj++) {
+ a[rj] = Aj[(rj)*LDA + k];
+ for (ri = 0; ri < REG_I; ri++) {
+ c[ri][rj] += a[rj] * b[ri];
+ }
+ }
+ }
+
+ /* store C in register blocks. */
+ for (ri = 0; ri < REG_I; ri++) {
+ for (rj = 0; rj < REG_J; rj++) {
+ Cj[i + ri + ( rj)*LDA] = c[ri][rj];
+ }
+ }
+ }
+ }
+ /* barrier(nc); */
+
+ /* kk_start= (coreid == 1 ? 0 : LDA/2); */
+ /* kk_end = (coreid == 1 ? LDA/2 : LDA); */
+ /* for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { */
+ /* // for (ii = 0; ii < LDA; ii += BLOCK_I) */
+ /* for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) { */
+ /* Aj = A + j*LDA; */
+ /* Cj = C + j*LDA; */
+ /* for (i = 0; i < LDA/\*, ii + BLOCK_I)*\/; i += REG_I) { */
+ /* /\* Load C in register blocks. *\/ */
+ /* Bi = B + i; */
+ /* for (ri = 0; ri < REG_I; ri++) { */
+ /* for (rj = 0; rj < REG_J; rj++) { */
+ /* c[ri][rj] = Cj[i + ri + ( rj)*LDA]; */
+ /* } */
+ /* } */
+
+
+ /* for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) { */
+ /* for (ri = 0; ri < REG_I; ri++) { */
+ /* b[ri] = Bi[k*LDA + ri]; */
+ /* } */
+ /* /\* Compute C in register blocks. *\/ */
+ /* for (rj = 0; rj < REG_J; rj++) { */
+ /* a[rj] = Aj[(rj)*LDA + k]; */
+ /* for (ri = 0; ri < REG_I; ri++) { */
+ /* c[ri][rj] += a[rj] * b[ri]; */
+ /* } */
+ /* } */
+ /* } */
+
+ /* store C in register blocks. */
+ /* for (ri = 0; ri < REG_I; ri++) { */
+ /* for (rj = 0; rj < REG_J; rj++) { */
+ /* Cj[i + ri + ( rj)*LDA] = c[ri][rj]; */
+ /* } */
+ /* } */
+ /* } */
+ /* } */
+ }
+ }
+
+
+ //barrier(nc);
+ for (jj = start; jj < end; jj += BLOCK_J) {
+ int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA);
+ for (kk = kk_start; kk < kk_end; kk += BLOCK_K) {
+ // for (ii = 0; ii < LDA; ii += BLOCK_I)
+ for (j = jj; j < MIN(end, jj + BLOCK_J); j += REG_J) {
+ Aj = A + j*LDA;
+ Cj = C + j*LDA;
+ for (i = 0; i < LDA/*, ii + BLOCK_I)*/; i += REG_I) {
+ /* Load C in register blocks. */
+ Bi = B + i;
+ for (ri = 0; ri < REG_I; ri++) {
+ for (rj = 0; rj < REG_J; rj++) {
+ c[ri][rj] = Cj[i + ri + ( rj)*LDA];
+ }
+ }
+
+
+ for (k = kk; k < MIN(LDA, kk + BLOCK_K); k++) {
+ for (ri = 0; ri < REG_I; ri++) {
+ b[ri] = Bi[k*LDA + ri];
+ }
+ /* Compute C in register blocks. */
+ for (rj = 0; rj < REG_J; rj++) {
+ a[rj] = Aj[(rj)*LDA + k];
+ for (ri = 0; ri < REG_I; ri++) {
+ c[ri][rj] += a[rj] * b[ri];
+ }
+ }
+ }
+
+ /* store C in register blocks. */
+ for (ri = 0; ri < REG_I; ri++) {
+ for (rj = 0; rj < REG_J; rj++) {
+ Cj[i + ri + ( rj)*LDA] = c[ri][rj];
+ }
+ }
+ }
+ }
+ }
+ }
+ /* We only care about performance for 32x32 matrices and 2 cores. Otherwise just naive mat_mul */
+} else {
+ if (coreid > 0)
+ return;
+
+ for ( i = 0; i < lda; i++ )
+ for ( j = 0; j < lda; j++ )
+ for ( k = 0; k < lda; k++ )
+ C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
+ }
+ }
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int j, k;
+ data_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ data_t temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15;
+ if(coreid == 0) {
+ //16*0:16*(0+1) ;; 16*1+16*(1+1)
+ //0:16 ;; 16:32
+
+ //complete Q1
+ for(j = 0; j < 16; j++) {
+ temp0 = C[j*lda];
+ temp1 = C[1 + j*lda];
+ temp2 = C[2 + j*lda];
+ temp3 = C[3 + j*lda];
+ temp4 = C[4 + j*lda];
+ temp5 = C[5 + j*lda];
+ temp6 = C[6 + j*lda];
+ temp7 = C[7 + j*lda];
+ temp8 = C[8 + j*lda];
+ temp9 = C[9 + j*lda];
+ temp10 = C[10 + j*lda];
+ temp11 = C[11 + j*lda];
+ temp12 = C[12 + j*lda];
+ temp13 = C[13 + j*lda];
+ temp14 = C[14 + j*lda];
+ temp15 = C[15 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[k*lda];
+ temp1 += A[j*lda + k] * B[1+k*lda];
+ temp2 += A[j*lda + k] * B[2+k*lda];
+ temp3 += A[j*lda + k] * B[3+k*lda];
+ temp4 += A[j*lda + k] * B[4+k*lda];
+ temp5 += A[j*lda + k] * B[5+k*lda];
+ temp6 += A[j*lda + k] * B[6+k*lda];
+ temp7 += A[j*lda + k] * B[7+k*lda];
+ temp8 += A[j*lda + k] * B[8+k*lda];
+ temp9 += A[j*lda + k] * B[9+k*lda];
+ temp10 += A[j*lda + k] * B[10+k*lda];
+ temp11 += A[j*lda + k] * B[11+k*lda];
+ temp12 += A[j*lda + k] * B[12+k*lda];
+ temp13 += A[j*lda + k] * B[13+k*lda];
+ temp14 += A[j*lda + k] * B[14+k*lda];
+ temp15 += A[j*lda + k] * B[15+k*lda];
+ }
+ C[j*lda] = temp0;
+ C[1 + j*lda] = temp1;
+ C[2 + j*lda] = temp2;
+ C[3 + j*lda] = temp3;
+ C[4 + j*lda] = temp4;
+ C[5 + j*lda] = temp5;
+ C[6 + j*lda] = temp6;
+ C[7 + j*lda] = temp7;
+ C[8 + j*lda] = temp8;
+ C[9 + j*lda] = temp9;
+ C[10 + j*lda] = temp10;
+ C[11 + j*lda] = temp11;
+ C[12 + j*lda] = temp12;
+ C[13 + j*lda] = temp13;
+ C[14 + j*lda] = temp14;
+ C[15 + j*lda] = temp15;
+ }
+ for(j = 16; j < 32; j++) {
+ temp0 = C[j*lda];
+ temp1 = C[1 + j*lda];
+ temp2 = C[2 + j*lda];
+ temp3 = C[3 + j*lda];
+ temp4 = C[4 + j*lda];
+ temp5 = C[5 + j*lda];
+ temp6 = C[6 + j*lda];
+ temp7 = C[7 + j*lda];
+ temp8 = C[8 + j*lda];
+ temp9 = C[9 + j*lda];
+ temp10 = C[10 + j*lda];
+ temp11 = C[11 + j*lda];
+ temp12 = C[12 + j*lda];
+ temp13 = C[13 + j*lda];
+ temp14 = C[14 + j*lda];
+ temp15 = C[15 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[k*lda];
+ temp1 += A[j*lda + k] * B[1+k*lda];
+ temp2 += A[j*lda + k] * B[2+k*lda];
+ temp3 += A[j*lda + k] * B[3+k*lda];
+ temp4 += A[j*lda + k] * B[4+k*lda];
+ temp5 += A[j*lda + k] * B[5+k*lda];
+ temp6 += A[j*lda + k] * B[6+k*lda];
+ temp7 += A[j*lda + k] * B[7+k*lda];
+ temp8 += A[j*lda + k] * B[8+k*lda];
+ temp9 += A[j*lda + k] * B[9+k*lda];
+ temp10 += A[j*lda + k] * B[10+k*lda];
+ temp11 += A[j*lda + k] * B[11+k*lda];
+ temp12 += A[j*lda + k] * B[12+k*lda];
+ temp13 += A[j*lda + k] * B[13+k*lda];
+ temp14 += A[j*lda + k] * B[14+k*lda];
+ temp15 += A[j*lda + k] * B[15+k*lda];
+ }
+ C[j*lda] = temp0;
+ C[1 + j*lda] = temp1;
+ C[2 + j*lda] = temp2;
+ C[3 + j*lda] = temp3;
+ C[4 + j*lda] = temp4;
+ C[5 + j*lda] = temp5;
+ C[6 + j*lda] = temp6;
+ C[7 + j*lda] = temp7;
+ C[8 + j*lda] = temp8;
+ C[9 + j*lda] = temp9;
+ C[10 + j*lda] = temp10;
+ C[11 + j*lda] = temp11;
+ C[12 + j*lda] = temp12;
+ C[13 + j*lda] = temp13;
+ C[14 + j*lda] = temp14;
+ C[15 + j*lda] = temp15;
+ }
+ }
+ //16*(2-1) : 16*2 ;; 16*(1-1) : 16*1
+ //16:32 ;; 0:16
+ if(coreid == 1 || ncores == 1) {
+ //complete Q3
+ for(j = 16; j < 32; j++) {
+ temp0 = C[16+j*lda];
+ temp1 = C[17+j*lda];
+ temp2 = C[18+j*lda];
+ temp3 = C[19+j*lda];
+ temp4 = C[20+j*lda];
+ temp5 = C[21+j*lda];
+ temp6 = C[22+j*lda];
+ temp7 = C[23+j*lda];
+ temp8 = C[24+j*lda];
+ temp9 = C[25+j*lda];
+ temp10 = C[26+j*lda];
+ temp11 = C[27+j*lda];
+ temp12 = C[28+j*lda];
+ temp13 = C[29+j*lda];
+ temp14 = C[30+j*lda];
+ temp15 = C[31+j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[16+k*lda];
+ temp1 += A[j*lda + k] * B[17+k*lda];
+ temp2 += A[j*lda + k] * B[18+k*lda];
+ temp3 += A[j*lda + k] * B[19+k*lda];
+ temp4 += A[j*lda + k] * B[20+k*lda];
+ temp5 += A[j*lda + k] * B[21+k*lda];
+ temp6 += A[j*lda + k] * B[22+k*lda];
+ temp7 += A[j*lda + k] * B[23+k*lda];
+ temp8 += A[j*lda + k] * B[24+k*lda];
+ temp9 += A[j*lda + k] * B[25+k*lda];
+ temp10 += A[j*lda + k] * B[26+k*lda];
+ temp11 += A[j*lda + k] * B[27+k*lda];
+ temp12 += A[j*lda + k] * B[28+k*lda];
+ temp13 += A[j*lda + k] * B[29+k*lda];
+ temp14 += A[j*lda + k] * B[30+k*lda];
+ temp15 += A[j*lda + k] * B[31+k*lda];
+ }
+ C[16 + j*lda] = temp0;
+ C[17 + j*lda] = temp1;
+ C[18 + j*lda] = temp2;
+ C[19 + j*lda] = temp3;
+ C[20 + j*lda] = temp4;
+ C[21 + j*lda] = temp5;
+ C[22 + j*lda] = temp6;
+ C[23 + j*lda] = temp7;
+ C[24 + j*lda] = temp8;
+ C[25 + j*lda] = temp9;
+ C[26 + j*lda] = temp10;
+ C[27 + j*lda] = temp11;
+ C[28 + j*lda] = temp12;
+ C[29 + j*lda] = temp13;
+ C[30 + j*lda] = temp14;
+ C[31 + j*lda] = temp15;
+ }
+ //complete Q4
+ for(j = 0; j < 16; j++) {
+ temp0 = C[16 + j*lda];
+ temp1 = C[17 + j*lda];
+ temp2 = C[18 + j*lda];
+ temp3 = C[19 + j*lda];
+ temp4 = C[20 + j*lda];
+ temp5 = C[21 + j*lda];
+ temp6 = C[22 + j*lda];
+ temp7 = C[23 + j*lda];
+ temp8 = C[24 + j*lda];
+ temp9 = C[25 + j*lda];
+ temp10 = C[26 + j*lda];
+ temp11 = C[27 + j*lda];
+ temp12 = C[28 + j*lda];
+ temp13 = C[29 + j*lda];
+ temp14 = C[30 + j*lda];
+ temp15 = C[31 + j*lda];
+ for(k = 0; k < 32; k++) {
+ temp0 += A[j*lda + k] * B[16 + k*lda];
+ temp1 += A[j*lda + k] * B[17 + k*lda];
+ temp2 += A[j*lda + k] * B[18 + k*lda];
+ temp3 += A[j*lda + k] * B[19 + k*lda];
+ temp4 += A[j*lda + k] * B[20 + k*lda];
+ temp5 += A[j*lda + k] * B[21 + k*lda];
+ temp6 += A[j*lda + k] * B[22 + k*lda];
+ temp7 += A[j*lda + k] * B[23 + k*lda];
+ temp8 += A[j*lda + k] * B[24 + k*lda];
+ temp9 += A[j*lda + k] * B[25 + k*lda];
+ temp10 += A[j*lda + k] * B[26 + k*lda];
+ temp11 += A[j*lda + k] * B[27 + k*lda];
+ temp12 += A[j*lda + k] * B[28 + k*lda];
+ temp13 += A[j*lda + k] * B[29 + k*lda];
+ temp14 += A[j*lda + k] * B[30 + k*lda];
+ temp15 += A[j*lda + k] * B[31 + k*lda];
+ }
+ C[16 + j*lda] = temp0;
+ C[17 + j*lda] = temp1;
+ C[18 + j*lda] = temp2;
+ C[19 + j*lda] = temp3;
+ C[20 + j*lda] = temp4;
+ C[21 + j*lda] = temp5;
+ C[22 + j*lda] = temp6;
+ C[23 + j*lda] = temp7;
+ C[24 + j*lda] = temp8;
+ C[25 + j*lda] = temp9;
+ C[26 + j*lda] = temp10;
+ C[27 + j*lda] = temp11;
+ C[28 + j*lda] = temp12;
+ C[29 + j*lda] = temp13;
+ C[30 + j*lda] = temp14;
+ C[31 + j*lda] = temp15;
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k;
+ int space=lda/ncores;
+ int max= space*coreid+space;
+ static data_t B1[32*32];
+ if (coreid==ncores-1){
+ for (i=0; i<lda*lda/ncores;i++)
+ {
+ B1[i]=B[i];
+ }
+ }
+ else{
+ for (i=lda*lda/ncores;i<lda*lda;i++)
+ B1[i]=B[i];
+ }
+ data_t temp=0;
+ data_t temp1=0;
+ data_t temp2=0;
+ data_t temp3=0;
+ data_t tempB=0;
+
+ data_t temp_1=0;
+ data_t temp1_1=0;
+ data_t temp2_1=0;
+ data_t temp3_1=0;
+ data_t tempB_1=0;
+
+ data_t temp_2=0;
+ data_t temp1_2=0;
+ data_t temp2_2=0;
+ data_t temp3_2=0;
+ data_t tempB_2=0;
+
+ data_t temp_3=0;
+ data_t temp1_3=0;
+ data_t temp2_3=0;
+ data_t temp3_3=0;
+ data_t tempB_3=0;
+ barrier(ncores);
+ if (coreid!=ncores-1){
+ for (i=space*coreid;i<max/4*4;i+=4)
+ {
+ for(j=0;j<lda/4*4;j+=4)
+ {
+ temp=C[j+i*lda];
+ temp1=C[j+(i+1)*lda];
+ temp2=C[j+(i+2)*lda];
+ temp3=C[j+(i+3)*lda];
+ temp_1=C[j+1+i*lda];
+ temp1_1=C[j+1+(i+1)*lda];
+ temp2_1=C[j+1+(i+2)*lda];
+ temp3_1=C[j+1+(i+3)*lda];
+ temp_2=C[j+2+i*lda];
+ temp1_2=C[j+2+(i+1)*lda];
+ temp2_2=C[j+2+(i+2)*lda];
+ temp3_2=C[j+2+(i+3)*lda];
+ temp_3=C[j+3+i*lda];
+ temp1_3=C[j+3+(i+1)*lda];
+ temp2_3=C[j+3+(i+2)*lda];
+ temp3_3=C[j+3+(i+3)*lda];
+ for (k=0;k<lda;k++)
+ {
+ tempB=B[j+k*lda];
+ temp+=A[k+i*lda]*tempB;
+ temp1+=A[k+(i+1)*lda]*tempB;
+ temp2+=A[k+(i+2)*lda]*tempB;
+ temp3+=A[k+(i+3)*lda]*tempB;
+
+ tempB_1=B[j+1+k*lda];
+ temp_1+=A[k+i*lda]*tempB_1;
+ temp1_1+=A[k+(i+1)*lda]*tempB_1;
+ temp2_1+=A[k+(i+2)*lda]*tempB_1;
+ temp3_1+=A[k+(i+3)*lda]*tempB_1;
+
+ tempB_2=B[j+2+k*lda];
+ temp_2+=A[k+i*lda]*tempB_2;
+ temp1_2+=A[k+(i+1)*lda]*tempB_2;
+ temp2_2+=A[k+(i+2)*lda]*tempB_2;
+ temp3_2+=A[k+(i+3)*lda]*tempB_2;
+
+ tempB_3=B[j+3+k*lda];
+ temp_3+=A[k+i*lda]*tempB_3;
+ temp1_3+=A[k+(i+1)*lda]*tempB_3;
+ temp2_3+=A[k+(i+2)*lda]*tempB_3;
+ temp3_3+=A[k+(i+3)*lda]*tempB_3;
+ }
+ C[j+i*lda]=temp;
+ C[j+(i+1)*lda]=temp1;
+ C[j+(i+2)*lda]=temp2;
+ C[j+(i+3)*lda]=temp3;
+
+ C[j+1+i*lda]=temp_1;
+ C[j+1+(i+1)*lda]=temp1_1;
+ C[j+1+(i+2)*lda]=temp2_1;
+ C[j+1+(i+3)*lda]=temp3_1;
+
+ C[j+2+i*lda]=temp_2;
+ C[j+2+(i+1)*lda]=temp1_2;
+ C[j+2+(i+2)*lda]=temp2_2;
+ C[j+2+(i+3)*lda]=temp3_2;
+
+ C[j+3+i*lda]=temp_3;
+ C[j+3+(i+1)*lda]=temp1_3;
+ C[j+3+(i+2)*lda]=temp2_3;
+ C[j+3+(i+3)*lda]=temp3_3;
+
+ }
+ }
+ }
+ else{
+ for (i=space*coreid;i<lda/4*4;i+=4)
+ {
+ for(j=0;j<lda/4*4;j+=4)
+ {
+ temp=C[j+i*lda];
+ temp1=C[j+(i+1)*lda];
+ temp2=C[j+(i+2)*lda];
+ temp3=C[j+(i+3)*lda];
+ temp_1=C[j+1+i*lda];
+ temp1_1=C[j+1+(i+1)*lda];
+ temp2_1=C[j+1+(i+2)*lda];
+ temp3_1=C[j+1+(i+3)*lda];
+ temp_2=C[j+2+i*lda];
+ temp1_2=C[j+2+(i+1)*lda];
+ temp2_2=C[j+2+(i+2)*lda];
+ temp3_2=C[j+2+(i+3)*lda];
+ temp_3=C[j+3+i*lda];
+ temp1_3=C[j+3+(i+1)*lda];
+ temp2_3=C[j+3+(i+2)*lda];
+ temp3_3=C[j+3+(i+3)*lda];
+ for (k=0;k<lda;k++)
+ {
+ tempB=B1[j+k*lda];
+ temp+=A[k+i*lda]*tempB;
+ temp1+=A[k+(i+1)*lda]*tempB;
+ temp2+=A[k+(i+2)*lda]*tempB;
+ temp3+=A[k+(i+3)*lda]*tempB;
+
+ tempB_1=B1[j+1+k*lda];
+ temp_1+=A[k+i*lda]*tempB_1;
+ temp1_1+=A[k+(i+1)*lda]*tempB_1;
+ temp2_1+=A[k+(i+2)*lda]*tempB_1;
+ temp3_1+=A[k+(i+3)*lda]*tempB_1;
+
+ tempB_2=B1[j+2+k*lda];
+ temp_2+=A[k+i*lda]*tempB_2;
+ temp1_2+=A[k+(i+1)*lda]*tempB_2;
+ temp2_2+=A[k+(i+2)*lda]*tempB_2;
+ temp3_2+=A[k+(i+3)*lda]*tempB_2;
+
+ tempB_3=B1[j+3+k*lda];
+ temp_3+=A[k+i*lda]*tempB_3;
+ temp1_3+=A[k+(i+1)*lda]*tempB_3;
+ temp2_3+=A[k+(i+2)*lda]*tempB_3;
+ temp3_3+=A[k+(i+3)*lda]*tempB_3;
+ }
+ C[j+i*lda]=temp;
+ C[j+(i+1)*lda]=temp1;
+ C[j+(i+2)*lda]=temp2;
+ C[j+(i+3)*lda]=temp3;
+
+ C[j+1+i*lda]=temp_1;
+ C[j+1+(i+1)*lda]=temp1_1;
+ C[j+1+(i+2)*lda]=temp2_1;
+ C[j+1+(i+3)*lda]=temp3_1;
+
+ C[j+2+i*lda]=temp_2;
+ C[j+2+(i+1)*lda]=temp1_2;
+ C[j+2+(i+2)*lda]=temp2_2;
+ C[j+2+(i+3)*lda]=temp3_2;
+
+ C[j+3+i*lda]=temp_3;
+ C[j+3+(i+1)*lda]=temp1_3;
+ C[j+3+(i+2)*lda]=temp2_3;
+ C[j+3+(i+3)*lda]=temp3_3;
+
+ }
+ }
+ }
+
+
+
+
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+ data_t acc_temp;
+ data_t *A_j, *B_i;
+ int j_start = coreid*(32/ncores);
+ int j_end = (coreid+1)*(32/ncores);
+ for ( i = 0; i < 32; i++ ) {
+ B_i = B + i;
+ for ( j = j_start; j < j_end; j++ )
+ {
+ acc_temp = 0;
+ A_j = A + j*32;
+ for ( k = 0; k < 32; k++ )
+ {
+ acc_temp += *(A_j + k) * *(B_i + k*32);
+ }
+ C[i + j*32] = acc_temp;
+ }
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int j2, i2, k2, j, i, k;
+ int tmpC00, tmpC01, tmpC02, tmpC03, tmpC04, tmpC05, tmpC06, tmpC07;
+ int tmpC10, tmpC11, tmpC12, tmpC13, tmpC14, tmpC15, tmpC16, tmpC17;
+ int jBLOCK = 32;
+ int iBLOCK = 16;
+ int kBLOCK = 32;
+ static __thread int tB[4096]; //__thread
+ int startInd = coreid*(lda/ncores);
+ int endInd = (coreid+1)*(lda/ncores);
+
+ //tranpose B (block?)
+ for (i = 0; i < lda; i += 2) {
+ for (j = startInd; j < endInd; j += 2) {
+ tB[j*lda + i] = B[i*lda + j];
+ tB[(j + 1)*lda + i] = B[i*lda + j + 1];
+ tB[j*lda + i + 1] = B[(i + 1)*lda + j];
+ tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
+ }
+ barrier(ncores);
+ }
+
+ // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
+ for ( j2 = 0; j2 < lda; j2 += jBLOCK )
+ for ( i2 = startInd; i2 < endInd; i2 += iBLOCK )
+ for ( j = j2; j < j2 + jBLOCK; j += 2 )
+ for ( k2 = 0; k2 < lda; k2 += kBLOCK )
+ for ( i = i2; i < i2 + iBLOCK; i += 8) {
+ tmpC00 = C[j*lda + i + 0]; tmpC10 = C[(j + 1)*lda + i + 0];
+ tmpC01 = C[j*lda + i + 1]; tmpC11 = C[(j + 1)*lda + i + 1];
+ tmpC02 = C[j*lda + i + 2]; tmpC12 = C[(j + 1)*lda + i + 2];
+ tmpC03 = C[j*lda + i + 3]; tmpC13 = C[(j + 1)*lda + i + 3];
+ tmpC04 = C[j*lda + i + 4]; tmpC14 = C[(j + 1)*lda + i + 4];
+ tmpC05 = C[j*lda + i + 5]; tmpC15 = C[(j + 1)*lda + i + 5];
+ tmpC06 = C[j*lda + i + 6]; tmpC16 = C[(j + 1)*lda + i + 6];
+ tmpC07 = C[j*lda + i + 7]; tmpC17 = C[(j + 1)*lda + i + 7];
+ for ( k = k2; k < k2 + kBLOCK; k += 4) {
+ tmpC00 += A[j*lda + k] * tB[(i + 0)*lda + k];
+ tmpC01 += A[j*lda + k] * tB[(i + 1)*lda + k];
+ tmpC02 += A[j*lda + k] * tB[(i + 2)*lda + k];
+ tmpC03 += A[j*lda + k] * tB[(i + 3)*lda + k];
+ tmpC04 += A[j*lda + k] * tB[(i + 4)*lda + k];
+ tmpC05 += A[j*lda + k] * tB[(i + 5)*lda + k];
+ tmpC06 += A[j*lda + k] * tB[(i + 6)*lda + k];
+ tmpC07 += A[j*lda + k] * tB[(i + 7)*lda + k];
+ tmpC10 += A[(j + 1)*lda + k] * tB[(i + 0)*lda + k];
+ tmpC11 += A[(j + 1)*lda + k] * tB[(i + 1)*lda + k];
+ tmpC12 += A[(j + 1)*lda + k] * tB[(i + 2)*lda + k];
+ tmpC13 += A[(j + 1)*lda + k] * tB[(i + 3)*lda + k];
+ tmpC14 += A[(j + 1)*lda + k] * tB[(i + 4)*lda + k];
+ tmpC15 += A[(j + 1)*lda + k] * tB[(i + 5)*lda + k];
+ tmpC16 += A[(j + 1)*lda + k] * tB[(i + 6)*lda + k];
+ tmpC17 += A[(j + 1)*lda + k] * tB[(i + 7)*lda + k];
+
+ tmpC00 += A[j*lda + k + 1] * tB[(i + 0)*lda + k + 1];
+ tmpC01 += A[j*lda + k + 1] * tB[(i + 1)*lda + k + 1];
+ tmpC02 += A[j*lda + k + 1] * tB[(i + 2)*lda + k + 1];
+ tmpC03 += A[j*lda + k + 1] * tB[(i + 3)*lda + k + 1];
+ tmpC04 += A[j*lda + k + 1] * tB[(i + 4)*lda + k + 1];
+ tmpC05 += A[j*lda + k + 1] * tB[(i + 5)*lda + k + 1];
+ tmpC06 += A[j*lda + k + 1] * tB[(i + 6)*lda + k + 1];
+ tmpC07 += A[j*lda + k + 1] * tB[(i + 7)*lda + k + 1];
+ tmpC10 += A[(j + 1)*lda + k + 1] * tB[(i + 0)*lda + k + 1];
+ tmpC11 += A[(j + 1)*lda + k + 1] * tB[(i + 1)*lda + k + 1];
+ tmpC12 += A[(j + 1)*lda + k + 1] * tB[(i + 2)*lda + k + 1];
+ tmpC13 += A[(j + 1)*lda + k + 1] * tB[(i + 3)*lda + k + 1];
+ tmpC14 += A[(j + 1)*lda + k + 1] * tB[(i + 4)*lda + k + 1];
+ tmpC15 += A[(j + 1)*lda + k + 1] * tB[(i + 5)*lda + k + 1];
+ tmpC16 += A[(j + 1)*lda + k + 1] * tB[(i + 6)*lda + k + 1];
+ tmpC17 += A[(j + 1)*lda + k + 1] * tB[(i + 7)*lda + k + 1];
+
+ tmpC00 += A[j*lda + k + 2] * tB[(i + 0)*lda + k + 2];
+ tmpC01 += A[j*lda + k + 2] * tB[(i + 1)*lda + k + 2];
+ tmpC02 += A[j*lda + k + 2] * tB[(i + 2)*lda + k + 2];
+ tmpC03 += A[j*lda + k + 2] * tB[(i + 3)*lda + k + 2];
+ tmpC04 += A[j*lda + k + 2] * tB[(i + 4)*lda + k + 2];
+ tmpC05 += A[j*lda + k + 2] * tB[(i + 5)*lda + k + 2];
+ tmpC06 += A[j*lda + k + 2] * tB[(i + 6)*lda + k + 2];
+ tmpC07 += A[j*lda + k + 2] * tB[(i + 7)*lda + k + 2];
+ tmpC10 += A[(j + 1)*lda + k + 2] * tB[(i + 0)*lda + k + 2];
+ tmpC11 += A[(j + 1)*lda + k + 2] * tB[(i + 1)*lda + k + 2];
+ tmpC12 += A[(j + 1)*lda + k + 2] * tB[(i + 2)*lda + k + 2];
+ tmpC13 += A[(j + 1)*lda + k + 2] * tB[(i + 3)*lda + k + 2];
+ tmpC14 += A[(j + 1)*lda + k + 2] * tB[(i + 4)*lda + k + 2];
+ tmpC15 += A[(j + 1)*lda + k + 2] * tB[(i + 5)*lda + k + 2];
+ tmpC16 += A[(j + 1)*lda + k + 2] * tB[(i + 6)*lda + k + 2];
+ tmpC17 += A[(j + 1)*lda + k + 2] * tB[(i + 7)*lda + k + 2];
+
+ tmpC00 += A[j*lda + k + 3] * tB[(i + 0)*lda + k + 3];
+ tmpC01 += A[j*lda + k + 3] * tB[(i + 1)*lda + k + 3];
+ tmpC02 += A[j*lda + k + 3] * tB[(i + 2)*lda + k + 3];
+ tmpC03 += A[j*lda + k + 3] * tB[(i + 3)*lda + k + 3];
+ tmpC04 += A[j*lda + k + 3] * tB[(i + 4)*lda + k + 3];
+ tmpC05 += A[j*lda + k + 3] * tB[(i + 5)*lda + k + 3];
+ tmpC06 += A[j*lda + k + 3] * tB[(i + 6)*lda + k + 3];
+ tmpC07 += A[j*lda + k + 3] * tB[(i + 7)*lda + k + 3];
+ tmpC10 += A[(j + 1)*lda + k + 3] * tB[(i + 0)*lda + k + 3];
+ tmpC11 += A[(j + 1)*lda + k + 3] * tB[(i + 1)*lda + k + 3];
+ tmpC12 += A[(j + 1)*lda + k + 3] * tB[(i + 2)*lda + k + 3];
+ tmpC13 += A[(j + 1)*lda + k + 3] * tB[(i + 3)*lda + k + 3];
+ tmpC14 += A[(j + 1)*lda + k + 3] * tB[(i + 4)*lda + k + 3];
+ tmpC15 += A[(j + 1)*lda + k + 3] * tB[(i + 5)*lda + k + 3];
+ tmpC16 += A[(j + 1)*lda + k + 3] * tB[(i + 6)*lda + k + 3];
+ tmpC17 += A[(j + 1)*lda + k + 3] * tB[(i + 7)*lda + k + 3];
+ }
+ C[j*lda + i + 0] = tmpC00; C[(j + 1)*lda + i + 0] = tmpC10;
+ C[j*lda + i + 1] = tmpC01; C[(j + 1)*lda + i + 1] = tmpC11;
+ C[j*lda + i + 2] = tmpC02; C[(j + 1)*lda + i + 2] = tmpC12;
+ C[j*lda + i + 3] = tmpC03; C[(j + 1)*lda + i + 3] = tmpC13;
+ C[j*lda + i + 4] = tmpC04; C[(j + 1)*lda + i + 4] = tmpC14;
+ C[j*lda + i + 5] = tmpC05; C[(j + 1)*lda + i + 5] = tmpC15;
+ C[j*lda + i + 6] = tmpC06; C[(j + 1)*lda + i + 6] = tmpC16;
+ C[j*lda + i + 7] = tmpC07; C[(j + 1)*lda + i + 7] = tmpC17;
+ barrier(ncores);
+ }
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+int i,j,k,a,b,b1,a1,a2,a3,c,c1,c2,c3,b2,b3;
+ for (j=coreid*4; j<lda; j+=4*ncores){
+ a=j*lda;
+ a1=(j+1)*lda;
+ a2=(j+2)*lda;
+ a3=(j+3)*lda;
+ for (k=0;k<lda; k+=2)
+ {
+ b = k*lda;
+ b1 = (k+1)*lda;
+ for (i=0;i<lda;i++){
+ c = B[b+i];
+ c1 = B[b1+i];
+ C[i+a]+=A[a+k]*c;
+ C[i+a1]+=A[a1+k]*c;
+ C[i+a2]+=A[a2+k]*c;
+ C[i+a3]+=A[a3+k]*c;
+ C[i+a]+=A[a+k+1]*c1;
+ C[i+a1]+=A[a1+k+1]*c1;
+ C[i+a2]+=A[a2+k+1]*c1;
+ C[i+a3]+=A[a3+k+1]*c1;
+}
+}
+}
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ size_t i, j, k, l;
+ int row,row2, column, column2, column3, column4, column5, column6, column7, column8;
+ size_t max_dim = 32*32;
+ data_t element, element2, element3, element4, element5, element6, element7, element8;
+ data_t temp_mat[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ data_t temp_mat2[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ //for (i=coreid*max_dim/ncores; i<(max_dim/ncores+coreid*max_dim/ncores); i+=8){
+ for (l=coreid*32/ncores; l<32*(1+coreid)/ncores; l+=2){
+ row=l*32;
+ row2=(l+1)*32;
+ for (i=0; i<lda; i+=4){
+ element = A[row+i];
+ element2 = A[row+i+1];
+ element3 = A[row+i+2];
+ element4 = A[row+i+3];
+ element5 = A[row2+i];
+ element6 = A[row2+i+1];
+ element7 = A[row2+i+2];
+ element8 = A[row2+i+3];
+ column=i*32;
+ column2=(i+1)*32;
+ column3=(i+2)*32;
+ column4=(i+3)*32;
+ for (j=0; j<32; j+=4){
+ temp_mat[j]+=element*B[column+j]+element2*B[column2+j]+element3*B[column3+j]+element4*B[column4+j];
+ temp_mat[j+1]+=element*B[column+j+1]+element2*B[column2+j+1]+element3*B[column3+j+1]+element4*B[column4+j+1];
+ temp_mat[j+2]+=element*B[column+j+2]+element2*B[column2+j+2]+element3*B[column3+j+2]+element4*B[column4+j+2];
+ temp_mat[j+3]+=element*B[column+j+3]+element2*B[column2+j+3]+element3*B[column3+j+3]+element4*B[column4+j+3];
+ temp_mat2[j]+=element5*B[column+j]+element6*B[column2+j]+element7*B[column3+j]+element8*B[column4+j];
+ temp_mat2[j+1]+=element5*B[column+j+1]+element6*B[column2+j+1]+element7*B[column3+j+1]+element8*B[column4+j+1];
+ temp_mat2[j+2]+=element5*B[column+j+2]+element6*B[column2+j+2]+element7*B[column3+j+2]+element8*B[column4+j+2];
+ temp_mat2[j+3]+=element5*B[column+j+3]+element6*B[column2+j+3]+element7*B[column3+j+3]+element8*B[column4+j+3];
+ }
+ /*if (i==28){
+ for(k=0; k<32; k++){
+ C[row+k]=temp_mat[k];
+ C[row2+k]=temp_mat2[k];
+ temp_mat[k]=0;
+ temp_mat2[k]=0;
+ }
+ }*/
+ }
+ for(k=0; k<32; k++){
+ C[row+k]=temp_mat[k];
+ C[row2+k]=temp_mat2[k];
+ temp_mat[k]=0;
+ temp_mat2[k]=0;
+ }
+ }
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+
+}
--- /dev/null
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k, ii, jj, kk;
+ if(coreid > 1) return;
+ if (coreid || ncores == 1) {
+// for ( ii = 0; ii < 32; ii+=IC )
+ for ( kk = 0; kk < 32; kk+=16 )
+ for ( j = 0; j < 16; j++ )
+// for ( j = 0; j < 16; j++ )
+ {
+ for ( i = 0; i < 32; i+=8 )
+// for ( i = ii; i < ii + IC && i < 32; i+=8 )
+ {
+ data_t temp0 = C[i+j*32];
+ data_t temp1 = C[i+j*32+1];
+ data_t temp2 = C[i+j*32+2];
+ data_t temp3 = C[i+j*32+3];
+ data_t temp4 = C[i+j*32+4];
+ data_t temp5 = C[i+j*32+5];
+ data_t temp6 = C[i+j*32+6];
+ data_t temp7 = C[i+j*32+7];
+ for ( k = kk; k < kk+16 && k < 32; k++ )
+// for ( k = 0; k < 32; k++ )
+ {
+ data_t tempA = A[j*32+k];
+ temp0 += tempA * B[k*32 + i];
+ temp1 += tempA * B[k*32 + i+1];
+ temp2 += tempA * B[k*32 + i+2];
+ temp3 += tempA * B[k*32 + i+3];
+ temp4 += tempA * B[k*32 + i+4];
+ temp5 += tempA * B[k*32 + i+5];
+ temp6 += tempA * B[k*32 + i+6];
+ temp7 += tempA * B[k*32 + i+7];
+ }
+ C[i+j*32] = temp0;
+ C[i+j*32+1] = temp1;
+ C[i+j*32+2] = temp2;
+ C[i+j*32+3] = temp3;
+ C[i+j*32+4] = temp4;
+ C[i+j*32+5] = temp5;
+ C[i+j*32+6] = temp6;
+ C[i+j*32+7] = temp7;
+ }
+ }
+ }
+ if(coreid == 0){
+// for ( ii = 0; ii < 32; ii+=IC )
+ for ( kk = 0; kk < 32; kk+=16 )
+ for ( j = 16; j < 32; j++ )
+// for ( j = 16; j < 32; j++ )
+ {
+ for ( i = 0; i < 32; i+=8 )
+// for ( i = ii; i < ii + IC && i < 32; i+=8 )
+ {
+ data_t temp0 = C[i+j*32];
+ data_t temp1 = C[i+j*32+1];
+ data_t temp2 = C[i+j*32+2];
+ data_t temp3 = C[i+j*32+3];
+ data_t temp4 = C[i+j*32+4];
+ data_t temp5 = C[i+j*32+5];
+ data_t temp6 = C[i+j*32+6];
+ data_t temp7 = C[i+j*32+7];
+ for ( k = kk; k < kk+16 && k < 32; k++ )
+ {
+ data_t tempA = A[j*32+k];
+ temp0 += tempA * B[k*32 + i];
+ temp1 += tempA * B[k*32 + i+1];
+ temp2 += tempA * B[k*32 + i+2];
+ temp3 += tempA * B[k*32 + i+3];
+ temp4 += tempA * B[k*32 + i+4];
+ temp5 += tempA * B[k*32 + i+5];
+ temp6 += tempA * B[k*32 + i+6];
+ temp7 += tempA * B[k*32 + i+7];
+ }
+ C[i+j*32] = temp0;
+ C[i+j*32+1] = temp1;
+ C[i+j*32+2] = temp2;
+ C[i+j*32+3] = temp3;
+ C[i+j*32+4] = temp4;
+ C[i+j*32+5] = temp5;
+ C[i+j*32+6] = temp6;
+ C[i+j*32+7] = temp7;
+ }
+
+ }
+ }
+}
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-#=======================================================================
-# UCB CS250 Makefile fragment for benchmarks
-#-----------------------------------------------------------------------
-#
-# Each benchmark directory should have its own fragment which
-# essentially lists what the source files are and how to link them
-# into an riscv and/or host executable. All variables should include
-# the benchmark name as a prefix so that they are unique.
-#
-
-mt_matmul_c_src = \
- mt-matmul.c \
-
-mt_matmul_riscv_src = \
- crt-mt.S \
-
-mt_matmul_c_objs = $(patsubst %.c, %.o, $(mt_matmul_c_src))
-mt_matmul_riscv_objs = $(patsubst %.S, %.o, $(mt_matmul_riscv_src))
-
-mt_matmul_host_bin = mt-matmul.host
-$(mt_matmul_host_bin) : $(mt_matmul_c_src)
- $(HOST_COMP) $^ -o $(mt_matmul_host_bin)
-
-mt_matmul_riscv_bin = mt-matmul.riscv
-$(mt_matmul_riscv_bin) : $(mt_matmul_c_objs) $(mt_matmul_riscv_objs)
- $(RISCV_LINK_MT) $(RISCV_LINK_SYSCALL) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) -o $(mt_matmul_riscv_bin)
-
-junk += $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) \
- $(mt_matmul_host_bin) $(mt_matmul_riscv_bin)
+++ /dev/null
-
-#define ARRAY_SIZE 1024
-
-
-#define DIM_SIZE 32
-
-static data_t input1_data[ARRAY_SIZE] =
-{
- 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
- 1, 1, 2, 3, 1, 2, 3, 1, 1, 3, 2, 2, 0, 1, 3, 2, 2, 2, 0, 0,
- 1, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 3, 2, 1, 2, 2, 0, 0, 3, 0,
- 1, 1, 0, 3, 3, 1, 2, 3, 3, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 3,
- 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 3, 1, 1, 2, 2, 3, 3, 1,
- 3, 2, 0, 0, 0, 3, 3, 3, 2, 1, 2, 3, 1, 0, 0, 0, 0, 1, 2, 2,
- 1, 1, 3, 3, 3, 1, 1, 2, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 0, 2,
- 2, 1, 1, 0, 0, 0, 0, 0, 1, 3, 3, 1, 1, 1, 2, 2, 3, 2, 1, 1,
- 1, 1, 3, 0, 2, 2, 1, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 2, 3,
- 1, 2, 1, 3, 2, 2, 0, 1, 0, 0, 1, 2, 3, 3, 1, 0, 0, 0, 3, 1,
- 2, 3, 2, 3, 2, 0, 0, 0, 0, 0, 3, 1, 3, 0, 0, 0, 3, 1, 1, 1,
- 1, 2, 1, 2, 3, 2, 0, 0, 2, 2, 3, 0, 3, 0, 0, 3, 0, 3, 1, 3,
- 3, 1, 1, 1, 2, 2, 1, 3, 0, 3, 3, 1, 0, 0, 3, 2, 1, 3, 3, 3,
- 1, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 3, 1, 2, 2, 2, 0, 1, 3, 3,
- 3, 2, 2, 1, 0, 1, 2, 0, 1, 1, 1, 1, 2, 3, 2, 2, 3, 3, 0, 0,
- 2, 0, 0, 0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 2, 0, 2, 0, 0,
- 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 0, 2, 0, 0, 3, 3, 2, 3, 3, 0,
- 1, 0, 2, 2, 0, 3, 3, 1, 1, 0, 2, 3, 2, 1, 1, 0, 1, 2, 1, 2,
- 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 2, 3, 3, 2, 0, 0, 1, 3, 0, 3,
- 3, 0, 0, 0, 0, 3, 3, 1, 0, 0, 3, 3, 2, 1, 2, 1, 3, 3, 0, 1,
- 3, 0, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 1, 3, 0, 2, 2, 3, 1, 2,
- 2, 2, 1, 3, 3, 0, 3, 0, 0, 2, 0, 2, 3, 0, 1, 3, 2, 2, 0, 0,
- 2, 3, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 3, 1, 0, 3, 3, 2, 0, 3,
- 2, 0, 3, 0, 2, 0, 0, 2, 2, 1, 0, 2, 3, 1, 1, 1, 1, 2, 3, 3,
- 3, 0, 0, 3, 3, 3, 2, 3, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 0, 1,
- 2, 0, 2, 0, 0, 1, 3, 2, 0, 1, 3, 2, 3, 3, 0, 0, 0, 1, 0, 3,
- 3, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 3, 2, 0, 2, 1, 3, 0,
- 0, 0, 1, 3, 3, 2, 2, 2, 3, 1, 0, 0, 1, 1, 2, 1, 3, 1, 1, 2,
- 2, 3, 2, 3, 0, 2, 3, 3, 0, 3, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1,
- 2, 3, 2, 1, 1, 2, 2, 2, 3, 0, 1, 1, 2, 1, 2, 0, 2, 3, 1, 3,
- 0, 1, 1, 3, 0, 2, 3, 0, 1, 2, 3, 2, 0, 0, 3, 3, 2, 1, 1, 2,
- 3, 0, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 0, 1, 3, 2, 3, 1,
- 0, 2, 1, 2, 1, 3, 3, 1, 0, 2, 2, 3, 1, 3, 1, 3, 0, 1, 0, 3,
- 0, 3, 2, 0, 3, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 0, 1, 1, 3, 0,
- 1, 2, 1, 0, 0, 0, 3, 2, 2, 0, 0, 2, 1, 3, 0, 0, 3, 0, 0, 2,
- 1, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2, 1, 3, 2, 1, 1, 3, 0, 1, 3,
- 3, 2, 2, 1, 0, 3, 2, 2, 2, 3, 0, 1, 3, 3, 2, 3, 0, 3, 2, 3,
- 1, 1, 0, 0, 0, 2, 3, 0, 3, 0, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1,
- 3, 2, 0, 2, 1, 0, 2, 3, 2, 3, 2, 1, 2, 3, 0, 0, 1, 1, 0, 0,
- 2, 1, 0, 1, 2, 2, 2, 2, 0, 3, 3, 1, 0, 0, 0, 0, 3, 1, 1, 0,
- 0, 0, 0, 1, 2, 2, 1, 3, 0, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 3,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 1, 1, 0, 0, 0, 3, 0,
- 1, 0, 3, 1, 1, 3, 0, 1, 2, 2, 0, 0, 3, 3, 3, 3, 2, 1, 0, 0,
- 1, 0, 2, 0, 1, 1, 0, 0, 3, 3, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1,
- 1, 2, 0, 3, 1, 3, 1, 0, 3, 0, 3, 1, 1, 1, 0, 2, 0, 3, 1, 0,
- 1, 0, 2, 0, 2, 3, 3, 3, 1, 2, 3, 2, 2, 0, 1, 1, 0, 3, 3, 1,
- 3, 3, 2, 0, 2, 0, 2, 2, 3, 3, 3, 0, 2, 3, 3, 1, 3, 2, 2, 2,
- 0, 2, 3, 0, 2, 0, 3, 2, 2, 1, 1, 0, 2, 2, 2, 0, 2, 2, 0, 1,
- 3, 2, 1, 3, 2, 2, 0, 3, 3, 1, 2, 2, 0, 0, 3, 2, 1, 2, 2, 1,
- 3, 1, 2, 0, 0, 1, 1, 2, 1, 3, 2, 2, 3, 0, 2, 1, 3, 2, 1, 3,
- 2, 3, 3, 1, 2, 1, 2, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 3,
- 3, 2, 2, 1
-};
-
-static data_t input2_data[ARRAY_SIZE] =
-{
- 1, 1, 0, 3, 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 3, 3, 2,
- 2, 1, 2, 3, 3, 0, 2, 2, 1, 1, 2, 2, 0, 2, 2, 1, 2, 3, 2, 2,
- 3, 3, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 0, 0, 3, 2, 3,
- 2, 3, 1, 2, 1, 1, 2, 2, 0, 1, 0, 3, 2, 1, 1, 1, 2, 0, 1, 2,
- 2, 0, 2, 1, 3, 3, 2, 3, 2, 0, 3, 1, 3, 3, 2, 0, 1, 0, 1, 1,
- 2, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1, 3, 2, 2, 2, 3, 3, 1, 0, 2,
- 1, 0, 0, 0, 1, 1, 2, 0, 3, 2, 3, 3, 0, 2, 3, 1, 0, 0, 2, 1,
- 2, 0, 2, 1, 1, 2, 3, 1, 3, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2,
- 1, 2, 0, 3, 2, 2, 0, 0, 3, 2, 1, 1, 3, 0, 2, 0, 0, 1, 0, 2,
- 3, 3, 1, 3, 3, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 1,
- 3, 2, 2, 1, 3, 2, 0, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 0, 1, 3,
- 2, 2, 3, 1, 1, 2, 0, 2, 1, 1, 2, 3, 1, 0, 1, 0, 1, 1, 0, 0,
- 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 3, 2, 1, 0, 2, 2, 1, 1, 0, 3,
- 3, 2, 2, 0, 0, 3, 0, 1, 0, 0, 1, 2, 0, 1, 3, 0, 1, 2, 2, 0,
- 0, 3, 0, 3, 0, 1, 1, 2, 0, 0, 0, 3, 0, 0, 2, 1, 1, 1, 0, 2,
- 1, 3, 1, 2, 0, 3, 0, 3, 1, 3, 0, 0, 2, 2, 2, 2, 3, 3, 2, 1,
- 2, 2, 1, 1, 2, 2, 2, 2, 0, 3, 0, 0, 2, 0, 1, 2, 0, 3, 2, 3,
- 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 3, 2, 2, 3, 1, 0, 3, 3, 1, 0,
- 3, 2, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 3, 1, 1, 0, 0, 2, 3, 0,
- 0, 1, 1, 1, 2, 1, 3, 2, 1, 3, 0, 1, 3, 3, 1, 1, 1, 1, 1, 1,
- 0, 0, 2, 3, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 2, 0, 1, 3, 0,
- 1, 1, 0, 1, 0, 1, 1, 3, 3, 1, 2, 2, 3, 2, 0, 2, 2, 0, 1, 3,
- 0, 1, 3, 2, 1, 3, 3, 2, 0, 1, 3, 2, 0, 2, 1, 1, 0, 3, 0, 1,
- 1, 1, 1, 1, 3, 0, 0, 1, 0, 2, 3, 1, 3, 0, 2, 1, 3, 0, 3, 0,
- 3, 2, 2, 0, 0, 2, 1, 3, 3, 2, 3, 2, 2, 1, 2, 2, 3, 0, 3, 2,
- 2, 0, 3, 2, 3, 2, 0, 0, 1, 2, 0, 0, 2, 0, 0, 3, 3, 2, 0, 0,
- 3, 3, 0, 2, 3, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 3, 0, 2,
- 2, 3, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 3, 3, 2, 1,
- 0, 0, 3, 1, 2, 3, 3, 1, 0, 3, 1, 1, 0, 3, 3, 3, 2, 2, 2, 0,
- 1, 2, 0, 3, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 2, 0, 1, 2, 2, 0,
- 2, 0, 0, 1, 0, 3, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 0,
- 2, 1, 1, 3, 2, 0, 2, 1, 1, 0, 2, 2, 1, 3, 0, 2, 1, 0, 1, 2,
- 0, 1, 3, 2, 3, 2, 1, 0, 2, 0, 2, 2, 3, 1, 1, 3, 2, 3, 2, 2,
- 0, 2, 0, 0, 0, 3, 2, 0, 2, 2, 3, 3, 3, 2, 1, 2, 0, 0, 3, 0,
- 2, 0, 3, 2, 2, 3, 0, 3, 2, 1, 2, 2, 1, 2, 0, 0, 3, 1, 2, 0,
- 2, 3, 2, 2, 1, 1, 1, 3, 3, 3, 3, 3, 1, 3, 0, 1, 3, 2, 2, 1,
- 0, 1, 1, 2, 1, 2, 3, 1, 2, 2, 1, 2, 1, 1, 0, 3, 3, 1, 1, 3,
- 2, 0, 0, 1, 2, 0, 1, 3, 1, 0, 0, 2, 2, 3, 3, 0, 2, 3, 2, 1,
- 1, 3, 0, 2, 2, 3, 3, 1, 2, 3, 3, 3, 1, 3, 0, 3, 1, 1, 2, 2,
- 2, 1, 0, 3, 2, 3, 0, 2, 3, 2, 3, 1, 2, 3, 3, 1, 2, 1, 0, 0,
- 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 1, 0, 3, 0, 3, 2, 3, 1, 0,
- 0, 1, 3, 1, 0, 2, 2, 3, 1, 0, 2, 1, 1, 3, 1, 1, 3, 1, 2, 1,
- 0, 0, 3, 2, 1, 1, 1, 1, 3, 2, 1, 3, 3, 1, 0, 3, 1, 1, 2, 0,
- 0, 0, 2, 3, 3, 2, 2, 3, 0, 2, 3, 1, 3, 3, 0, 2, 1, 2, 2, 2,
- 1, 0, 1, 3, 2, 3, 1, 1, 2, 1, 1, 0, 0, 2, 3, 2, 1, 0, 3, 1,
- 3, 0, 1, 1, 2, 2, 1, 3, 3, 1, 1, 0, 0, 3, 3, 0, 0, 0, 0, 0,
- 3, 1, 3, 0, 0, 0, 3, 3, 2, 1, 3, 0, 1, 3, 1, 1, 1, 0, 1, 0,
- 1, 2, 2, 2, 3, 3, 0, 2, 3, 2, 1, 3, 3, 1, 1, 3, 0, 3, 3, 2,
- 1, 1, 2, 0, 3, 0, 1, 2, 1, 1, 0, 0, 1, 2, 2, 0, 3, 1, 1, 1,
- 3, 3, 3, 1, 0, 3, 3, 2, 2, 2, 1, 2, 0, 1, 1, 3, 0, 3, 1, 0,
- 2, 2, 0, 1, 2, 3, 2, 1, 2, 0, 3, 2, 1, 3, 0, 1, 2, 0, 3, 0,
- 1, 1, 2, 1
-};
-
-static data_t verify_data[ARRAY_SIZE] =
-{
- 72, 75, 88, 101, 80, 88, 73, 75, 80, 81, 58, 75, 86, 65, 60, 80, 84, 83, 87, 83,
- 108, 93, 85, 76, 72, 98, 79, 86, 80, 96, 91, 85, 72, 64, 70, 83, 68, 92, 51, 54,
- 85, 85, 60, 58, 90, 64, 55, 69, 72, 48, 94, 77, 91, 83, 70, 69, 67, 77, 59, 50,
- 67, 74, 77, 67, 67, 62, 72, 71, 68, 79, 54, 61, 67, 61, 55, 62, 78, 60, 53, 64,
- 67, 69, 99, 68, 88, 60, 66, 63, 70, 62, 65, 50, 53, 66, 70, 72, 75, 78, 85, 95,
- 71, 89, 70, 68, 86, 88, 58, 77, 84, 70, 65, 68, 73, 75, 91, 96, 105, 92, 76, 68,
- 86, 69, 80, 59, 73, 83, 88, 75, 64, 63, 71, 99, 77, 77, 69, 55, 80, 73, 54, 73,
- 87, 78, 60, 69, 65, 78, 86, 89, 95, 92, 63, 69, 89, 61, 80, 65, 70, 77, 89, 77,
- 79, 79, 73, 92, 64, 81, 60, 78, 81, 80, 61, 63, 89, 65, 56, 83, 77, 65, 102, 70,
- 98, 86, 96, 68, 72, 89, 73, 73, 70, 89, 84, 76, 48, 61, 63, 70, 70, 79, 50, 53,
- 64, 63, 43, 51, 59, 62, 43, 63, 55, 77, 79, 74, 75, 74, 64, 44, 65, 69, 72, 66,
- 54, 71, 74, 72, 69, 76, 68, 89, 94, 75, 65, 53, 85, 79, 65, 74, 82, 73, 58, 70,
- 84, 77, 99, 72, 92, 84, 78, 62, 59, 83, 71, 74, 63, 85, 80, 78, 71, 72, 79, 83,
- 73, 82, 60, 85, 76, 82, 60, 70, 82, 68, 54, 85, 84, 70, 86, 74, 100, 88, 98, 68,
- 67, 87, 69, 73, 68, 88, 76, 71, 47, 43, 47, 80, 54, 65, 40, 37, 59, 53, 33, 48,
- 62, 40, 36, 55, 36, 62, 53, 57, 70, 69, 45, 43, 53, 61, 42, 57, 56, 63, 51, 47,
- 59, 75, 64, 89, 83, 75, 59, 75, 91, 92, 58, 64, 83, 74, 58, 60, 76, 66, 97, 69,
- 90, 95, 92, 64, 78, 75, 77, 73, 65, 78, 82, 75, 47, 54, 59, 71, 59, 56, 53, 42,
- 60, 55, 40, 51, 60, 46, 36, 59, 46, 57, 67, 43, 51, 53, 53, 38, 54, 56, 55, 48,
- 41, 46, 63, 63, 80, 77, 89, 102, 89, 98, 74, 86, 98, 93, 63, 76, 98, 77, 48, 101,
- 86, 88, 100, 82, 102, 90, 95, 75, 86, 103, 83, 98, 80, 104, 98, 86, 71, 74, 80, 90,
- 86, 87, 73, 70, 81, 83, 55, 66, 90, 66, 58, 84, 77, 84, 93, 72, 99, 75, 85, 65,
- 70, 89, 71, 82, 64, 79, 82, 80, 67, 73, 86, 101, 78, 97, 66, 64, 84, 80, 55, 64,
- 79, 73, 51, 79, 89, 68, 94, 77, 109, 102, 82, 61, 66, 93, 88, 70, 82, 82, 85, 69,
- 69, 72, 66, 97, 85, 90, 70, 59, 76, 89, 53, 56, 90, 79, 71, 64, 70, 67, 100, 92,
- 106, 89, 83, 78, 73, 80, 70, 72, 65, 70, 92, 88, 57, 76, 55, 85, 66, 80, 61, 63,
- 63, 78, 54, 58, 71, 73, 54, 63, 63, 62, 89, 76, 86, 81, 83, 54, 70, 81, 78, 64,
- 56, 72, 74, 81, 75, 63, 68, 89, 65, 77, 58, 68, 75, 83, 52, 62, 82, 63, 55, 75,
- 51, 70, 95, 66, 83, 77, 86, 61, 64, 77, 48, 70, 66, 82, 72, 75, 79, 71, 72, 89,
- 78, 78, 66, 59, 91, 80, 55, 64, 79, 68, 54, 71, 67, 75, 87, 84, 100, 101, 76, 58,
- 74, 82, 61, 74, 75, 97, 85, 79, 61, 55, 69, 68, 72, 65, 52, 64, 80, 73, 48, 54,
- 71, 66, 42, 61, 66, 63, 92, 64, 85, 77, 73, 54, 74, 73, 76, 66, 62, 79, 85, 70,
- 71, 84, 87, 81, 88, 86, 77, 77, 93, 88, 78, 71, 101, 89, 58, 84, 95, 81, 89, 97,
- 104, 79, 83, 76, 90, 81, 91, 74, 70, 76, 91, 80, 51, 48, 56, 69, 47, 63, 54, 42,
- 63, 63, 42, 52, 66, 56, 39, 59, 61, 52, 59, 63, 62, 68, 57, 35, 67, 58, 56, 52,
- 61, 63, 60, 47, 85, 75, 89, 106, 88, 95, 74, 82, 107, 107, 64, 78, 98, 90, 62, 91,
- 79, 87, 111, 84, 104, 106, 96, 68, 94, 99, 81, 89, 79, 105, 95, 86, 65, 63, 77, 89,
- 66, 88, 56, 73, 82, 92, 41, 62, 85, 66, 50, 81, 57, 71, 77, 78, 86, 89, 77, 53,
- 67, 78, 61, 63, 72, 82, 69, 66, 59, 46, 55, 70, 56, 64, 45, 50, 65, 64, 42, 56,
- 78, 49, 51, 52, 38, 56, 72, 55, 73, 72, 61, 50, 63, 60, 47, 57, 55, 73, 53, 68,
- 85, 88, 91, 96, 82, 89, 73, 76, 87, 86, 67, 69, 96, 84, 57, 89, 87, 89, 99, 88,
- 104, 90, 85, 75, 88, 92, 85, 75, 74, 87, 103, 94, 55, 48, 56, 65, 72, 50, 45, 51,
- 63, 62, 47, 57, 79, 53, 36, 63, 54, 68, 71, 59, 63, 61, 63, 41, 50, 73, 57, 59,
- 56, 76, 73, 65, 61, 64, 61, 79, 53, 73, 57, 44, 61, 59, 59, 56, 81, 59, 49, 62,
- 65, 55, 69, 72, 79, 70, 58, 57, 68, 61, 62, 50, 57, 60, 66, 66, 63, 77, 81, 89,
- 85, 81, 76, 73, 78, 95, 59, 70, 81, 77, 46, 79, 78, 79, 83, 81, 84, 82, 85, 48,
- 74, 85, 85, 74, 74, 80, 80, 74, 60, 76, 80, 97, 88, 93, 66, 66, 73, 84, 56, 70,
- 90, 63, 58, 78, 73, 93, 90, 78, 94, 88, 82, 67, 85, 70, 81, 86, 74, 82, 88, 82,
- 68, 73, 75, 91, 78, 97, 71, 66, 74, 85, 50, 59, 86, 77, 70, 74, 75, 74, 99, 82,
- 99, 91, 86, 65, 80, 77, 72, 69, 60, 78, 90, 87, 79, 69, 74, 98, 70, 86, 81, 67,
- 69, 78, 48, 65, 88, 70, 70, 70, 69, 72, 96, 90, 99, 82, 81, 76, 98, 73, 74, 71,
- 69, 73, 94, 89
-};
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# matmul_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the matmul benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: matmul_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3d",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3d",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-
-
-#--------------------------------------------------------------------------
-# Matmul
-#--------------------------------------------------------------------------
-
-# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
-
-sub mmult {
- my ($m1,$m2) = @_;
- my ($m1rows,$m1cols) = matdim($m1);
- my ($m2rows,$m2cols) = matdim($m2);
-
- my $result = [ ];
- my ($i, $j, $k);
-
- for $i (range($m1rows)) {
- for $j (range($m2cols)) {
- for $k (range($m1cols)) {
- $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
- }
- }
- }
- return $result;
-}
-
-sub range { 0 .. ($_[0] - 1) }
-
-
-sub veclen {
- my $ary_ref = $_[0];
- my $type = ref $ary_ref;
- if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
- return scalar(@$ary_ref);
-}
-
-sub matdim {
- my $matrix = $_[0];
- my $rows = veclen($matrix);
- my $cols = veclen($matrix->[0]);
- return ($rows, $cols);
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- # create random input arrays
- my $mat_values1;
- my $mat_values2;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- $mat_values1->[$i][$j] = int(rand(4));
- $mat_values2->[$i][$j] = int(rand(4));
- }
- }
-
- # perform matmul
- my $mat_results = mmult( $mat_values1, $mat_values2 );
-
- # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
- my @values1;
- my @values2;
- my @results;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
- my $value1 = $mat_values1->[$i][$j];
- my $value2 = $mat_values2->[$i][$j];
- my $result = $mat_results->[$i][$j];
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @results, $result );
- }
- }
-
- print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
- print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
-
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@results);
-
-}
-
-main();
-
+++ /dev/null
-//**************************************************************************
-// Multi-threaded Matrix Multiply benchmark
-//--------------------------------------------------------------------------
-// TA : Christopher Celio
-// Student:
-//
-//
-// This benchmark multiplies two 2-D arrays together and writes the results to
-// a third vector. The input data (and reference data) should be generated
-// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h.
-
-
-// print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// matmul function
-
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
-
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[ARRAY_SIZE];
-
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
-
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results:", ARRAY_SIZE, results_data);
- printArrayMT("verify :", ARRAY_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(ARRAY_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-#=======================================================================
-# UCB CS250 Makefile fragment for benchmarks
-#-----------------------------------------------------------------------
-#
-# Each benchmark directory should have its own fragment which
-# essentially lists what the source files are and how to link them
-# into an riscv and/or host executable. All variables should include
-# the benchmark name as a prefix so that they are unique.
-#
-
-mt_vvadd_c_src = \
- mt-vvadd.c \
-
-mt_vvadd_riscv_src = \
- crt-mt.S \
-
-mt_vvadd_c_objs = $(patsubst %.c, %.o, $(mt_vvadd_c_src))
-mt_vvadd_riscv_objs = $(patsubst %.S, %.o, $(mt_vvadd_riscv_src))
-
-mt_vvadd_host_bin = mt-vvadd.host
-$(mt_vvadd_host_bin) : $(mt_vvadd_c_src)
- $(HOST_COMP) $^ -o $(mt_vvadd_host_bin)
-
-mt_vvadd_riscv_bin = mt-vvadd.riscv
-$(mt_vvadd_riscv_bin) : $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs)
- $(RISCV_LINK_MT) $(RISCV_LINK_SYSCALL) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) -o $(mt_vvadd_riscv_bin)
-
-junk += $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) \
- $(mt_vvadd_host_bin) $(mt_vvadd_riscv_bin)
+++ /dev/null
-
-#define DATA_SIZE 1000
-
-static data_t input1_data[DATA_SIZE] =
-{
- 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
- 8.00, 5.00, 12.00, 14.00, 6.00, 12.00, 16.00, 7.00, 9.00, 17.00, 10.00, 10.00, 3.00, 5.00, 14.00, 11.00, 9.00, 12.00, 3.00, 1.00,
- 5.00, 4.00, 6.00, 17.00, 17.00, 4.00, 17.00, 15.00, 17.00, 18.00, 3.00, 18.00, 10.00, 6.00, 12.00, 12.00, 3.00, 2.00, 16.00, 1.00,
- 5.00, 6.00, 2.00, 17.00, 16.00, 5.00, 10.00, 18.00, 14.00, 4.00, 9.00, 9.00, 7.00, 4.00, 8.00, 13.00, 12.00, 6.00, 4.00, 17.00,
- 5.00, 2.00, 11.00, 11.00, 7.00, 6.00, 8.00, 5.00, 6.00, 6.00, 11.00, 1.00, 18.00, 7.00, 6.00, 9.00, 10.00, 17.00, 14.00, 4.00,
- 14.00, 11.00, 2.00, 0.00, 2.00, 17.00, 17.00, 15.00, 10.00, 8.00, 12.00, 18.00, 5.00, 0.00, 0.00, 1.00, 1.00, 8.00, 11.00, 11.00,
- 7.00, 5.00, 15.00, 18.00, 15.00, 6.00, 8.00, 9.00, 18.00, 6.00, 15.00, 16.00, 10.00, 18.00, 13.00, 6.00, 10.00, 16.00, 0.00, 10.00,
- 12.00, 8.00, 8.00, 3.00, 0.00, 1.00, 4.00, 0.00, 8.00, 15.00, 16.00, 9.00, 7.00, 7.00, 10.00, 9.00, 16.00, 12.00, 5.00, 5.00,
- 9.00, 5.00, 17.00, 4.00, 13.00, 13.00, 4.00, 14.00, 10.00, 5.00, 10.00, 10.00, 6.00, 18.00, 5.00, 15.00, 5.00, 15.00, 13.00, 18.00,
- 6.00, 13.00, 5.00, 18.00, 12.00, 14.00, 1.00, 7.00, 2.00, 1.00, 7.00, 12.00, 15.00, 15.00, 6.00, 2.00, 1.00, 2.00, 18.00, 6.00,
- 10.00, 14.00, 9.00, 15.00, 11.00, 0.00, 3.00, 1.00, 2.00, 2.00, 14.00, 7.00, 18.00, 0.00, 2.00, 2.00, 16.00, 8.00, 4.00, 5.00,
- 7.00, 11.00, 7.00, 10.00, 14.00, 10.00, 3.00, 0.00, 13.00, 9.00, 14.00, 0.00, 14.00, 1.00, 3.00, 16.00, 2.00, 14.00, 6.00, 17.00,
- 17.00, 6.00, 9.00, 8.00, 9.00, 12.00, 5.00, 14.00, 2.00, 16.00, 17.00, 5.00, 4.00, 0.00, 14.00, 10.00, 6.00, 15.00, 15.00, 14.00,
- 5.00, 1.00, 8.00, 8.00, 13.00, 8.00, 0.00, 4.00, 7.00, 9.00, 13.00, 16.00, 9.00, 14.00, 9.00, 13.00, 0.00, 7.00, 16.00, 17.00,
- 18.00, 10.00, 13.00, 8.00, 4.00, 9.00, 13.00, 0.00, 6.00, 4.00, 6.00, 4.00, 10.00, 14.00, 14.00, 9.00, 15.00, 15.00, 3.00, 3.00,
- 12.00, 0.00, 3.00, 2.00, 16.00, 1.00, 7.00, 2.00, 16.00, 2.00, 2.00, 0.00, 14.00, 3.00, 3.00, 10.00, 4.00, 10.00, 3.00, 4.00,
- 13.00, 14.00, 13.00, 0.00, 1.00, 15.00, 16.00, 9.00, 7.00, 9.00, 0.00, 11.00, 1.00, 1.00, 15.00, 17.00, 12.00, 16.00, 15.00, 4.00,
- 8.00, 2.00, 10.00, 10.00, 0.00, 18.00, 17.00, 7.00, 7.00, 2.00, 10.00, 17.00, 9.00, 7.00, 5.00, 3.00, 8.00, 11.00, 6.00, 9.00,
- 13.00, 0.00, 3.00, 5.00, 2.00, 5.00, 7.00, 4.00, 9.00, 2.00, 13.00, 17.00, 14.00, 12.00, 1.00, 3.00, 7.00, 17.00, 0.00, 14.00,
- 16.00, 2.00, 2.00, 1.00, 2.00, 15.00, 16.00, 8.00, 2.00, 4.00, 15.00, 15.00, 10.00, 6.00, 11.00, 9.00, 15.00, 17.00, 3.00, 8.00,
- 15.00, 3.00, 10.00, 15.00, 8.00, 14.00, 16.00, 15.00, 15.00, 14.00, 1.00, 7.00, 4.00, 18.00, 2.00, 13.00, 11.00, 15.00, 7.00, 10.00,
- 13.00, 10.00, 7.00, 14.00, 18.00, 4.00, 18.00, 4.00, 3.00, 9.00, 1.00, 13.00, 15.00, 2.00, 5.00, 15.00, 12.00, 10.00, 2.00, 0.00,
- 10.00, 15.00, 0.00, 11.00, 14.00, 11.00, 14.00, 9.00, 1.00, 18.00, 14.00, 18.00, 15.00, 5.00, 1.00, 15.00, 18.00, 14.00, 3.00, 15.00,
- 11.00, 2.00, 15.00, 0.00, 13.00, 1.00, 4.00, 14.00, 14.00, 5.00, 2.00, 13.00, 17.00, 8.00, 7.00, 6.00, 5.00, 10.00, 14.00, 14.00,
- 17.00, 0.00, 0.00, 17.00, 18.00, 15.00, 10.00, 16.00, 18.00, 5.00, 9.00, 10.00, 18.00, 7.00, 11.00, 5.00, 4.00, 16.00, 2.00, 8.00,
- 13.00, 1.00, 12.00, 3.00, 4.00, 6.00, 15.00, 12.00, 0.00, 6.00, 18.00, 12.00, 14.00, 18.00, 3.00, 2.00, 3.00, 5.00, 3.00, 14.00,
- 18.00, 12.00, 10.00, 11.00, 8.00, 4.00, 10.00, 10.00, 9.00, 18.00, 14.00, 3.00, 7.00, 17.00, 12.00, 0.00, 10.00, 9.00, 17.00, 3.00,
- 0.00, 4.00, 6.00, 16.00, 14.00, 12.00, 13.00, 13.00, 18.00, 7.00, 0.00, 1.00, 9.00, 7.00, 12.00, 6.00, 18.00, 8.00, 9.00, 13.00,
- 13.00, 17.00, 10.00, 16.00, 1.00, 10.00, 17.00, 16.00, 2.00, 18.00, 4.00, 2.00, 6.00, 1.00, 1.00, 1.00, 8.00, 14.00, 6.00, 6.00,
- 13.00, 14.00, 13.00, 6.00, 5.00, 10.00, 11.00, 11.00, 16.00, 1.00, 5.00, 9.00, 13.00, 8.00, 10.00, 2.00, 12.00, 15.00, 5.00, 14.00,
- 3.00, 7.00, 9.00, 18.00, 2.00, 11.00, 16.00, 4.00, 5.00, 10.00, 17.00, 10.00, 3.00, 4.00, 14.00, 18.00, 13.00, 6.00, 8.00, 11.00,
- 14.00, 3.00, 5.00, 6.00, 6.00, 5.00, 13.00, 0.00, 9.00, 9.00, 1.00, 7.00, 5.00, 5.00, 1.00, 6.00, 18.00, 11.00, 17.00, 7.00,
- 1.00, 10.00, 5.00, 12.00, 6.00, 16.00, 16.00, 5.00, 1.00, 10.00, 10.00, 15.00, 7.00, 18.00, 8.00, 17.00, 3.00, 5.00, 3.00, 14.00,
- 0.00, 16.00, 12.00, 0.00, 14.00, 17.00, 16.00, 2.00, 18.00, 13.00, 10.00, 13.00, 4.00, 14.00, 2.00, 3.00, 4.00, 8.00, 17.00, 0.00,
- 6.00, 11.00, 5.00, 3.00, 3.00, 2.00, 15.00, 13.00, 10.00, 4.00, 1.00, 11.00, 6.00, 17.00, 1.00, 0.00, 18.00, 3.00, 3.00, 11.00,
- 7.00, 7.00, 11.00, 14.00, 7.00, 16.00, 11.00, 10.00, 8.00, 6.00, 11.00, 5.00, 17.00, 10.00, 7.00, 8.00, 14.00, 2.00, 9.00, 17.00,
- 15.00, 13.00, 10.00, 6.00, 0.00, 15.00, 11.00, 10.00, 11.00, 18.00, 2.00, 5.00, 17.00, 18.00, 11.00, 15.00, 3.00, 17.00, 9.00, 17.00,
- 8.00, 6.00, 2.00, 4.00, 2.00, 11.00, 15.00, 2.00, 18.00, 3.00, 9.00, 7.00, 15.00, 9.00, 14.00, 10.00, 9.00, 6.00, 13.00, 8.00,
- 15.00, 14.00, 0.00, 11.00, 5.00, 2.00, 12.00, 14.00, 10.00, 16.00, 9.00, 7.00, 9.00, 17.00, 4.00, 4.00, 7.00, 8.00, 4.00, 4.00,
- 9.00, 7.00, 3.00, 5.00, 11.00, 11.00, 10.00, 13.00, 3.00, 14.00, 15.00, 8.00, 1.00, 1.00, 3.00, 0.00, 16.00, 9.00, 6.00, 1.00,
- 0.00, 2.00, 0.00, 6.00, 13.00, 12.00, 5.00, 18.00, 1.00, 11.00, 17.00, 11.00, 16.00, 14.00, 14.00, 9.00, 11.00, 9.00, 17.00, 15.00,
- 5.00, 18.00, 2.00, 11.00, 10.00, 16.00, 18.00, 5.00, 11.00, 12.00, 11.00, 18.00, 7.00, 6.00, 8.00, 3.00, 4.00, 3.00, 16.00, 4.00,
- 6.00, 2.00, 15.00, 6.00, 7.00, 16.00, 0.00, 7.00, 11.00, 10.00, 3.00, 0.00, 14.00, 16.00, 15.00, 15.00, 12.00, 7.00, 1.00, 4.00,
- 8.00, 4.00, 12.00, 0.00, 7.00, 8.00, 1.00, 1.00, 14.00, 15.00, 9.00, 8.00, 6.00, 6.00, 4.00, 7.00, 8.00, 13.00, 10.00, 5.00,
- 8.00, 11.00, 2.00, 16.00, 7.00, 17.00, 5.00, 2.00, 17.00, 0.00, 18.00, 6.00, 7.00, 4.00, 4.00, 12.00, 0.00, 18.00, 8.00, 4.00,
- 7.00, 0.00, 11.00, 1.00, 11.00, 17.00, 18.00, 15.00, 8.00, 11.00, 15.00, 9.00, 12.00, 1.00, 5.00, 6.00, 1.00, 18.00, 14.00, 7.00,
- 16.00, 16.00, 10.00, 3.00, 13.00, 0.00, 12.00, 9.00, 18.00, 14.00, 15.00, 4.00, 11.00, 15.00, 15.00, 8.00, 16.00, 11.00, 13.00, 12.00,
- 1.00, 13.00, 14.00, 2.00, 11.00, 0.00, 17.00, 11.00, 12.00, 6.00, 4.00, 4.00, 11.00, 13.00, 10.00, 2.00, 10.00, 14.00, 0.00, 6.00,
- 18.00, 10.00, 7.00, 14.00, 12.00, 9.00, 4.00, 16.00, 17.00, 8.00, 14.00, 9.00, 0.00, 4.00, 15.00, 13.00, 8.00, 13.00, 13.00, 8.00,
- 15.00, 6.00, 11.00, 4.00, 2.00, 6.00, 5.00, 14.00, 5.00, 17.00, 12.00, 11.00, 17.00, 4.00, 13.00, 7.00, 16.00, 12.00, 7.00, 18.00
-};
-
-static data_t input2_data[DATA_SIZE] =
-{
- 8.00, 6.00, 0.00, 18.00, 6.00, 10.00, 1.00, 2.00, 4.00, 2.00, 4.00, 10.00, 6.00, 11.00, 17.00, 4.00, 0.00, 16.00, 14.00, 12.00,
- 9.00, 8.00, 13.00, 15.00, 18.00, 2.00, 13.00, 10.00, 5.00, 4.00, 12.00, 9.00, 1.00, 13.00, 12.00, 7.00, 10.00, 17.00, 11.00, 10.00,
- 18.00, 15.00, 11.00, 12.00, 7.00, 9.00, 6.00, 5.00, 11.00, 7.00, 10.00, 12.00, 18.00, 18.00, 15.00, 1.00, 3.00, 18.00, 11.00, 16.00,
- 13.00, 18.00, 4.00, 13.00, 8.00, 7.00, 10.00, 13.00, 0.00, 9.00, 1.00, 16.00, 13.00, 7.00, 5.00, 5.00, 11.00, 1.00, 6.00, 10.00,
- 12.00, 3.00, 10.00, 5.00, 15.00, 15.00, 13.00, 14.00, 14.00, 1.00, 18.00, 5.00, 16.00, 14.00, 10.00, 4.00, 8.00, 4.00, 6.00, 6.00,
- 13.00, 14.00, 8.00, 5.00, 14.00, 10.00, 9.00, 10.00, 17.00, 15.00, 6.00, 16.00, 12.00, 9.00, 10.00, 16.00, 16.00, 8.00, 1.00, 12.00,
- 7.00, 0.00, 0.00, 3.00, 5.00, 7.00, 10.00, 3.00, 17.00, 10.00, 18.00, 16.00, 1.00, 11.00, 18.00, 9.00, 2.00, 0.00, 12.00, 6.00,
- 13.00, 1.00, 13.00, 5.00, 7.00, 13.00, 17.00, 9.00, 15.00, 13.00, 5.00, 2.00, 4.00, 4.00, 3.00, 0.00, 9.00, 11.00, 3.00, 12.00,
- 6.00, 11.00, 1.00, 16.00, 12.00, 11.00, 2.00, 2.00, 15.00, 12.00, 8.00, 9.00, 14.00, 2.00, 11.00, 0.00, 0.00, 7.00, 2.00, 13.00,
- 15.00, 18.00, 7.00, 16.00, 16.00, 1.00, 1.00, 12.00, 12.00, 2.00, 2.00, 1.00, 7.00, 4.00, 0.00, 8.00, 18.00, 4.00, 11.00, 6.00,
- 17.00, 13.00, 12.00, 5.00, 16.00, 12.00, 0.00, 9.00, 10.00, 10.00, 18.00, 12.00, 8.00, 7.00, 5.00, 8.00, 16.00, 3.00, 9.00, 18.00,
- 12.00, 13.00, 18.00, 6.00, 8.00, 12.00, 2.00, 12.00, 8.00, 8.00, 9.00, 18.00, 8.00, 0.00, 9.00, 2.00, 6.00, 7.00, 0.00, 3.00,
- 11.00, 2.00, 18.00, 2.00, 16.00, 1.00, 16.00, 11.00, 11.00, 16.00, 16.00, 11.00, 7.00, 4.00, 14.00, 10.00, 5.00, 8.00, 4.00, 14.00,
- 17.00, 13.00, 13.00, 3.00, 1.00, 14.00, 1.00, 7.00, 0.00, 2.00, 7.00, 14.00, 4.00, 9.00, 14.00, 3.00, 9.00, 13.00, 13.00, 3.00,
- 3.00, 17.00, 0.00, 18.00, 4.00, 8.00, 6.00, 9.00, 4.00, 2.00, 0.00, 14.00, 3.00, 3.00, 14.00, 8.00, 6.00, 7.00, 2.00, 12.00,
- 5.00, 14.00, 6.00, 12.00, 2.00, 16.00, 1.00, 15.00, 7.00, 18.00, 0.00, 0.00, 13.00, 13.00, 12.00, 11.00, 16.00, 15.00, 14.00, 8.00,
- 9.00, 10.00, 8.00, 8.00, 13.00, 13.00, 13.00, 10.00, 1.00, 15.00, 4.00, 0.00, 12.00, 1.00, 8.00, 12.00, 1.00, 18.00, 12.00, 18.00,
- 9.00, 1.00, 11.00, 5.00, 13.00, 7.00, 1.00, 13.00, 5.00, 8.00, 17.00, 11.00, 13.00, 15.00, 9.00, 3.00, 17.00, 18.00, 9.00, 3.00,
- 15.00, 11.00, 12.00, 0.00, 2.00, 15.00, 3.00, 0.00, 13.00, 1.00, 14.00, 14.00, 15.00, 8.00, 6.00, 0.00, 0.00, 11.00, 17.00, 0.00,
- 1.00, 8.00, 6.00, 6.00, 10.00, 6.00, 18.00, 12.00, 7.00, 18.00, 4.00, 6.00, 15.00, 18.00, 7.00, 5.00, 8.00, 6.00, 6.00, 7.00,
- 4.00, 4.00, 10.00, 17.00, 12.00, 13.00, 11.00, 15.00, 12.00, 18.00, 7.00, 12.00, 17.00, 10.00, 14.00, 12.00, 2.00, 7.00, 17.00, 3.00,
- 8.00, 6.00, 3.00, 9.00, 3.00, 7.00, 7.00, 15.00, 18.00, 5.00, 13.00, 13.00, 15.00, 10.00, 0.00, 11.00, 10.00, 1.00, 5.00, 16.00,
- 2.00, 7.00, 14.00, 12.00, 7.00, 17.00, 17.00, 11.00, 0.00, 5.00, 16.00, 14.00, 1.00, 9.00, 8.00, 8.00, 3.00, 17.00, 0.00, 8.00,
- 6.00, 5.00, 7.00, 6.00, 17.00, 3.00, 3.00, 8.00, 3.00, 12.00, 17.00, 5.00, 14.00, 3.00, 11.00, 5.00, 17.00, 2.00, 15.00, 1.00,
- 18.00, 11.00, 12.00, 0.00, 0.00, 14.00, 7.00, 17.00, 15.00, 10.00, 18.00, 10.00, 11.00, 7.00, 12.00, 10.00, 17.00, 2.00, 18.00, 9.00,
- 11.00, 4.00, 17.00, 10.00, 15.00, 12.00, 4.00, 1.00, 5.00, 10.00, 4.00, 2.00, 11.00, 3.00, 4.00, 15.00, 16.00, 10.00, 2.00, 2.00,
- 15.00, 16.00, 0.00, 13.00, 16.00, 9.00, 1.00, 7.00, 3.00, 10.00, 7.00, 2.00, 12.00, 8.00, 1.00, 5.00, 0.00, 16.00, 4.00, 14.00,
- 13.00, 16.00, 3.00, 0.00, 10.00, 6.00, 3.00, 9.00, 1.00, 3.00, 0.00, 13.00, 12.00, 17.00, 11.00, 4.00, 15.00, 15.00, 12.00, 4.00,
- 4.00, 2.00, 16.00, 6.00, 11.00, 17.00, 14.00, 7.00, 4.00, 14.00, 8.00, 8.00, 3.00, 18.00, 17.00, 17.00, 12.00, 10.00, 11.00, 1.00,
- 8.00, 13.00, 1.00, 14.00, 0.00, 9.00, 0.00, 7.00, 9.00, 0.00, 7.00, 12.00, 0.00, 18.00, 10.00, 1.00, 5.00, 13.00, 13.00, 2.00,
- 10.00, 4.00, 2.00, 6.00, 0.00, 16.00, 2.00, 15.00, 13.00, 6.00, 8.00, 5.00, 6.00, 15.00, 12.00, 6.00, 6.00, 7.00, 8.00, 1.00,
- 11.00, 9.00, 7.00, 18.00, 14.00, 4.00, 9.00, 6.00, 4.00, 2.00, 10.00, 13.00, 6.00, 17.00, 2.00, 11.00, 7.00, 3.00, 8.00, 9.00,
- 2.00, 6.00, 18.00, 12.00, 18.00, 13.00, 8.00, 1.00, 11.00, 4.00, 13.00, 14.00, 16.00, 8.00, 6.00, 18.00, 14.00, 15.00, 9.00, 11.00,
- 2.00, 13.00, 4.00, 3.00, 3.00, 15.00, 14.00, 3.00, 13.00, 12.00, 14.00, 16.00, 18.00, 12.00, 5.00, 11.00, 2.00, 3.00, 15.00, 1.00,
- 12.00, 1.00, 15.00, 13.00, 12.00, 18.00, 4.00, 17.00, 13.00, 7.00, 11.00, 13.00, 7.00, 10.00, 1.00, 3.00, 18.00, 6.00, 10.00, 3.00,
- 9.00, 16.00, 12.00, 10.00, 6.00, 6.00, 7.00, 16.00, 16.00, 16.00, 17.00, 18.00, 8.00, 18.00, 0.00, 6.00, 17.00, 13.00, 14.00, 8.00,
- 0.00, 7.00, 5.00, 13.00, 8.00, 12.00, 14.00, 9.00, 10.00, 10.00, 9.00, 9.00, 8.00, 6.00, 0.00, 14.00, 16.00, 8.00, 7.00, 16.00,
- 10.00, 3.00, 1.00, 9.00, 11.00, 2.00, 6.00, 18.00, 5.00, 4.00, 2.00, 11.00, 10.00, 17.00, 16.00, 2.00, 12.00, 15.00, 14.00, 6.00,
- 6.00, 17.00, 3.00, 10.00, 9.00, 18.00, 18.00, 6.00, 11.00, 16.00, 18.00, 17.00, 7.00, 14.00, 1.00, 16.00, 7.00, 9.00, 11.00, 12.00,
- 10.00, 6.00, 1.00, 16.00, 9.00, 18.00, 0.00, 9.00, 16.00, 10.00, 14.00, 6.00, 9.00, 14.00, 15.00, 7.00, 12.00, 8.00, 1.00, 1.00,
- 1.00, 17.00, 17.00, 17.00, 18.00, 1.00, 15.00, 18.00, 16.00, 16.00, 11.00, 7.00, 4.00, 15.00, 3.00, 14.00, 13.00, 14.00, 9.00, 2.00,
- 4.00, 8.00, 14.00, 7.00, 0.00, 12.00, 14.00, 15.00, 8.00, 2.00, 11.00, 5.00, 6.00, 16.00, 6.00, 4.00, 16.00, 7.00, 9.00, 5.00,
- 1.00, 0.00, 16.00, 10.00, 9.00, 6.00, 5.00, 8.00, 15.00, 13.00, 7.00, 18.00, 18.00, 8.00, 0.00, 15.00, 4.00, 6.00, 14.00, 3.00,
- 3.00, 2.00, 9.00, 14.00, 18.00, 13.00, 10.00, 15.00, 0.00, 11.00, 16.00, 7.00, 16.00, 18.00, 0.00, 12.00, 6.00, 13.00, 12.00, 12.00,
- 5.00, 3.00, 5.00, 16.00, 13.00, 16.00, 8.00, 5.00, 12.00, 8.00, 8.00, 0.00, 2.00, 9.00, 18.00, 11.00, 8.00, 4.00, 14.00, 6.00,
- 17.00, 2.00, 6.00, 8.00, 11.00, 13.00, 6.00, 18.00, 17.00, 6.00, 8.00, 0.00, 1.00, 14.00, 18.00, 4.00, 0.00, 4.00, 3.00, 3.00,
- 16.00, 8.00, 16.00, 1.00, 2.00, 0.00, 15.00, 14.00, 10.00, 9.00, 15.00, 3.00, 5.00, 18.00, 5.00, 6.00, 7.00, 3.00, 7.00, 2.00,
- 6.00, 12.00, 10.00, 10.00, 18.00, 16.00, 0.00, 9.00, 17.00, 10.00, 9.00, 14.00, 15.00, 5.00, 8.00, 15.00, 3.00, 15.00, 14.00, 11.00,
- 6.00, 6.00, 14.00, 0.00, 14.00, 0.00, 7.00, 12.00, 8.00, 7.00, 1.00, 3.00, 6.00, 10.00, 12.00, 1.00, 16.00, 5.00, 5.00, 6.00,
- 17.00, 15.00, 15.00, 9.00, 4.00, 18.00, 17.00, 13.00, 12.00, 9.00, 7.00, 10.00, 2.00, 5.00, 8.00, 18.00, 1.00, 15.00, 8.00, 0.00
-};
-
-static data_t verify_data[DATA_SIZE] =
-{
- 8.00, 21.00, 10.00, 21.00, 20.00, 16.00, 3.00, 20.00, 15.00, 17.00, 15.00, 10.00, 23.00, 27.00, 24.00, 17.00, 18.00, 18.00, 16.00, 17.00,
- 17.00, 13.00, 25.00, 29.00, 24.00, 14.00, 29.00, 17.00, 14.00, 21.00, 22.00, 19.00, 4.00, 18.00, 26.00, 18.00, 19.00, 29.00, 14.00, 11.00,
- 23.00, 19.00, 17.00, 29.00, 24.00, 13.00, 23.00, 20.00, 28.00, 25.00, 13.00, 30.00, 28.00, 24.00, 27.00, 13.00, 6.00, 20.00, 27.00, 17.00,
- 18.00, 24.00, 6.00, 30.00, 24.00, 12.00, 20.00, 31.00, 14.00, 13.00, 10.00, 25.00, 20.00, 11.00, 13.00, 18.00, 23.00, 7.00, 10.00, 27.00,
- 17.00, 5.00, 21.00, 16.00, 22.00, 21.00, 21.00, 19.00, 20.00, 7.00, 29.00, 6.00, 34.00, 21.00, 16.00, 13.00, 18.00, 21.00, 20.00, 10.00,
- 27.00, 25.00, 10.00, 5.00, 16.00, 27.00, 26.00, 25.00, 27.00, 23.00, 18.00, 34.00, 17.00, 9.00, 10.00, 17.00, 17.00, 16.00, 12.00, 23.00,
- 14.00, 5.00, 15.00, 21.00, 20.00, 13.00, 18.00, 12.00, 35.00, 16.00, 33.00, 32.00, 11.00, 29.00, 31.00, 15.00, 12.00, 16.00, 12.00, 16.00,
- 25.00, 9.00, 21.00, 8.00, 7.00, 14.00, 21.00, 9.00, 23.00, 28.00, 21.00, 11.00, 11.00, 11.00, 13.00, 9.00, 25.00, 23.00, 8.00, 17.00,
- 15.00, 16.00, 18.00, 20.00, 25.00, 24.00, 6.00, 16.00, 25.00, 17.00, 18.00, 19.00, 20.00, 20.00, 16.00, 15.00, 5.00, 22.00, 15.00, 31.00,
- 21.00, 31.00, 12.00, 34.00, 28.00, 15.00, 2.00, 19.00, 14.00, 3.00, 9.00, 13.00, 22.00, 19.00, 6.00, 10.00, 19.00, 6.00, 29.00, 12.00,
- 27.00, 27.00, 21.00, 20.00, 27.00, 12.00, 3.00, 10.00, 12.00, 12.00, 32.00, 19.00, 26.00, 7.00, 7.00, 10.00, 32.00, 11.00, 13.00, 23.00,
- 19.00, 24.00, 25.00, 16.00, 22.00, 22.00, 5.00, 12.00, 21.00, 17.00, 23.00, 18.00, 22.00, 1.00, 12.00, 18.00, 8.00, 21.00, 6.00, 20.00,
- 28.00, 8.00, 27.00, 10.00, 25.00, 13.00, 21.00, 25.00, 13.00, 32.00, 33.00, 16.00, 11.00, 4.00, 28.00, 20.00, 11.00, 23.00, 19.00, 28.00,
- 22.00, 14.00, 21.00, 11.00, 14.00, 22.00, 1.00, 11.00, 7.00, 11.00, 20.00, 30.00, 13.00, 23.00, 23.00, 16.00, 9.00, 20.00, 29.00, 20.00,
- 21.00, 27.00, 13.00, 26.00, 8.00, 17.00, 19.00, 9.00, 10.00, 6.00, 6.00, 18.00, 13.00, 17.00, 28.00, 17.00, 21.00, 22.00, 5.00, 15.00,
- 17.00, 14.00, 9.00, 14.00, 18.00, 17.00, 8.00, 17.00, 23.00, 20.00, 2.00, 0.00, 27.00, 16.00, 15.00, 21.00, 20.00, 25.00, 17.00, 12.00,
- 22.00, 24.00, 21.00, 8.00, 14.00, 28.00, 29.00, 19.00, 8.00, 24.00, 4.00, 11.00, 13.00, 2.00, 23.00, 29.00, 13.00, 34.00, 27.00, 22.00,
- 17.00, 3.00, 21.00, 15.00, 13.00, 25.00, 18.00, 20.00, 12.00, 10.00, 27.00, 28.00, 22.00, 22.00, 14.00, 6.00, 25.00, 29.00, 15.00, 12.00,
- 28.00, 11.00, 15.00, 5.00, 4.00, 20.00, 10.00, 4.00, 22.00, 3.00, 27.00, 31.00, 29.00, 20.00, 7.00, 3.00, 7.00, 28.00, 17.00, 14.00,
- 17.00, 10.00, 8.00, 7.00, 12.00, 21.00, 34.00, 20.00, 9.00, 22.00, 19.00, 21.00, 25.00, 24.00, 18.00, 14.00, 23.00, 23.00, 9.00, 15.00,
- 19.00, 7.00, 20.00, 32.00, 20.00, 27.00, 27.00, 30.00, 27.00, 32.00, 8.00, 19.00, 21.00, 28.00, 16.00, 25.00, 13.00, 22.00, 24.00, 13.00,
- 21.00, 16.00, 10.00, 23.00, 21.00, 11.00, 25.00, 19.00, 21.00, 14.00, 14.00, 26.00, 30.00, 12.00, 5.00, 26.00, 22.00, 11.00, 7.00, 16.00,
- 12.00, 22.00, 14.00, 23.00, 21.00, 28.00, 31.00, 20.00, 1.00, 23.00, 30.00, 32.00, 16.00, 14.00, 9.00, 23.00, 21.00, 31.00, 3.00, 23.00,
- 17.00, 7.00, 22.00, 6.00, 30.00, 4.00, 7.00, 22.00, 17.00, 17.00, 19.00, 18.00, 31.00, 11.00, 18.00, 11.00, 22.00, 12.00, 29.00, 15.00,
- 35.00, 11.00, 12.00, 17.00, 18.00, 29.00, 17.00, 33.00, 33.00, 15.00, 27.00, 20.00, 29.00, 14.00, 23.00, 15.00, 21.00, 18.00, 20.00, 17.00,
- 24.00, 5.00, 29.00, 13.00, 19.00, 18.00, 19.00, 13.00, 5.00, 16.00, 22.00, 14.00, 25.00, 21.00, 7.00, 17.00, 19.00, 15.00, 5.00, 16.00,
- 33.00, 28.00, 10.00, 24.00, 24.00, 13.00, 11.00, 17.00, 12.00, 28.00, 21.00, 5.00, 19.00, 25.00, 13.00, 5.00, 10.00, 25.00, 21.00, 17.00,
- 13.00, 20.00, 9.00, 16.00, 24.00, 18.00, 16.00, 22.00, 19.00, 10.00, 0.00, 14.00, 21.00, 24.00, 23.00, 10.00, 33.00, 23.00, 21.00, 17.00,
- 17.00, 19.00, 26.00, 22.00, 12.00, 27.00, 31.00, 23.00, 6.00, 32.00, 12.00, 10.00, 9.00, 19.00, 18.00, 18.00, 20.00, 24.00, 17.00, 7.00,
- 21.00, 27.00, 14.00, 20.00, 5.00, 19.00, 11.00, 18.00, 25.00, 1.00, 12.00, 21.00, 13.00, 26.00, 20.00, 3.00, 17.00, 28.00, 18.00, 16.00,
- 13.00, 11.00, 11.00, 24.00, 2.00, 27.00, 18.00, 19.00, 18.00, 16.00, 25.00, 15.00, 9.00, 19.00, 26.00, 24.00, 19.00, 13.00, 16.00, 12.00,
- 25.00, 12.00, 12.00, 24.00, 20.00, 9.00, 22.00, 6.00, 13.00, 11.00, 11.00, 20.00, 11.00, 22.00, 3.00, 17.00, 25.00, 14.00, 25.00, 16.00,
- 3.00, 16.00, 23.00, 24.00, 24.00, 29.00, 24.00, 6.00, 12.00, 14.00, 23.00, 29.00, 23.00, 26.00, 14.00, 35.00, 17.00, 20.00, 12.00, 25.00,
- 2.00, 29.00, 16.00, 3.00, 17.00, 32.00, 30.00, 5.00, 31.00, 25.00, 24.00, 29.00, 22.00, 26.00, 7.00, 14.00, 6.00, 11.00, 32.00, 1.00,
- 18.00, 12.00, 20.00, 16.00, 15.00, 20.00, 19.00, 30.00, 23.00, 11.00, 12.00, 24.00, 13.00, 27.00, 2.00, 3.00, 36.00, 9.00, 13.00, 14.00,
- 16.00, 23.00, 23.00, 24.00, 13.00, 22.00, 18.00, 26.00, 24.00, 22.00, 28.00, 23.00, 25.00, 28.00, 7.00, 14.00, 31.00, 15.00, 23.00, 25.00,
- 15.00, 20.00, 15.00, 19.00, 8.00, 27.00, 25.00, 19.00, 21.00, 28.00, 11.00, 14.00, 25.00, 24.00, 11.00, 29.00, 19.00, 25.00, 16.00, 33.00,
- 18.00, 9.00, 3.00, 13.00, 13.00, 13.00, 21.00, 20.00, 23.00, 7.00, 11.00, 18.00, 25.00, 26.00, 30.00, 12.00, 21.00, 21.00, 27.00, 14.00,
- 21.00, 31.00, 3.00, 21.00, 14.00, 20.00, 30.00, 20.00, 21.00, 32.00, 27.00, 24.00, 16.00, 31.00, 5.00, 20.00, 14.00, 17.00, 15.00, 16.00,
- 19.00, 13.00, 4.00, 21.00, 20.00, 29.00, 10.00, 22.00, 19.00, 24.00, 29.00, 14.00, 10.00, 15.00, 18.00, 7.00, 28.00, 17.00, 7.00, 2.00,
- 1.00, 19.00, 17.00, 23.00, 31.00, 13.00, 20.00, 36.00, 17.00, 27.00, 28.00, 18.00, 20.00, 29.00, 17.00, 23.00, 24.00, 23.00, 26.00, 17.00,
- 9.00, 26.00, 16.00, 18.00, 10.00, 28.00, 32.00, 20.00, 19.00, 14.00, 22.00, 23.00, 13.00, 22.00, 14.00, 7.00, 20.00, 10.00, 25.00, 9.00,
- 7.00, 2.00, 31.00, 16.00, 16.00, 22.00, 5.00, 15.00, 26.00, 23.00, 10.00, 18.00, 32.00, 24.00, 15.00, 30.00, 16.00, 13.00, 15.00, 7.00,
- 11.00, 6.00, 21.00, 14.00, 25.00, 21.00, 11.00, 16.00, 14.00, 26.00, 25.00, 15.00, 22.00, 24.00, 4.00, 19.00, 14.00, 26.00, 22.00, 17.00,
- 13.00, 14.00, 7.00, 32.00, 20.00, 33.00, 13.00, 7.00, 29.00, 8.00, 26.00, 6.00, 9.00, 13.00, 22.00, 23.00, 8.00, 22.00, 22.00, 10.00,
- 24.00, 2.00, 17.00, 9.00, 22.00, 30.00, 24.00, 33.00, 25.00, 17.00, 23.00, 9.00, 13.00, 15.00, 23.00, 10.00, 1.00, 22.00, 17.00, 10.00,
- 32.00, 24.00, 26.00, 4.00, 15.00, 0.00, 27.00, 23.00, 28.00, 23.00, 30.00, 7.00, 16.00, 33.00, 20.00, 14.00, 23.00, 14.00, 20.00, 14.00,
- 7.00, 25.00, 24.00, 12.00, 29.00, 16.00, 17.00, 20.00, 29.00, 16.00, 13.00, 18.00, 26.00, 18.00, 18.00, 17.00, 13.00, 29.00, 14.00, 17.00,
- 24.00, 16.00, 21.00, 14.00, 26.00, 9.00, 11.00, 28.00, 25.00, 15.00, 15.00, 12.00, 6.00, 14.00, 27.00, 14.00, 24.00, 18.00, 18.00, 14.00,
- 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
-};
-
+++ /dev/null
-//**************************************************************************
-// Vector-vector add benchmark
-//--------------------------------------------------------------------------
-// Author : Andrew Waterman
-// TA : Christopher Celio
-// Student :
-//
-// This benchmark adds two vectors and writes the results to a
-// third vector. The input data (and reference data) should be
-// generated using the vvadd_gendata.pl perl script and dumped
-// to a file named dataset.h
-
-// to print out arrays, etc.
-//#define DEBUG
-
-//--------------------------------------------------------------------------
-// Includes
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-
-//--------------------------------------------------------------------------
-// Input/Reference Data
-
-typedef float data_t;
-#include "dataset.h"
-
-
-//--------------------------------------------------------------------------
-// Basic Utilities and Multi-thread Support
-
-__thread unsigned long coreid;
-unsigned long ncores;
-
-#include "util.h"
-
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArrayMT( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
-//--------------------------------------------------------------------------
-// vvadd function
-
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
-
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-}
-
-//--------------------------------------------------------------------------
-// Main
-//
-// all threads start executing thread_entry(). Use their "coreid" to
-// differentiate between threads (each thread is running on a separate core).
-
-void thread_entry(int cid, int nc)
-{
- coreid = cid;
- ncores = nc;
-
- // static allocates data in the binary, which is visible to both threads
- static data_t results_data[DATA_SIZE];
-
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
- barrier(nc);
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
-
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
- barrier(nc);
-
-
- // Execute your faster vvadd
- barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
-#ifdef DEBUG
- printArrayMT("results: ", DATA_SIZE, results_data);
- printArrayMT("verify : ", DATA_SIZE, verify_data);
-#endif
-
- // verify
- verifyMT(DATA_SIZE, results_data, verify_data);
- barrier(nc);
-
- exit(0);
-}
-
+++ /dev/null
-#!/usr/bin/perl -w
-#==========================================================================
-# vvadd_gendata.pl
-#
-# Author : Christopher Batten (cbatten@mit.edu)
-# Date : April 29, 2005
-#
-(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
-#
-# Simple script which creates an input data set and the reference data
-# for the vvadd benchmark.
-#
-ENDMSG
-
-use strict "vars";
-use warnings;
-no warnings("once");
-use Getopt::Long;
-
-#--------------------------------------------------------------------------
-# Command line processing
-#--------------------------------------------------------------------------
-
-our %opts;
-
-sub usage()
-{
-
- print "\n";
- print " Usage: vvadd_gendata.pl [options] \n";
- print "\n";
- print " Options:\n";
- print " --help print this message\n";
- print " --size size of input data [1000]\n";
- print " --seed random seed [1]\n";
- print "$usageMsg";
-
- exit();
-}
-
-sub processCommandLine()
-{
-
- $opts{"help"} = 0;
- $opts{"size"} = 1000;
- $opts{"seed"} = 1;
- Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
- $opts{"help"} and usage();
-
-}
-
-#--------------------------------------------------------------------------
-# Helper Functions
-#--------------------------------------------------------------------------
-
-sub printArray
-{
- my $arrayName = $_[0];
- my $arrayRef = $_[1];
-
- my $numCols = 20;
- my $arrayLen = scalar(@{$arrayRef});
-
- print "static data_t ".$arrayName."[DATA_SIZE] = \n";
- print "{\n";
-
- if ( $arrayLen <= $numCols ) {
- print " ";
- for ( my $i = 0; $i < $arrayLen; $i++ ) {
- print sprintf("%3.2f",$arrayRef->[$i]);
- if ( $i != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- else {
- my $numRows = int($arrayLen/$numCols);
- for ( my $j = 0; $j < $numRows; $j++ ) {
- print " ";
- for ( my $i = 0; $i < $numCols; $i++ ) {
- my $index = $j*$numCols + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- if ( $arrayLen > ($numRows*$numCols) ) {
- print " ";
- for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
- my $index = $numCols*$numRows + $i;
- print sprintf("%3.2f",$arrayRef->[$index]);
- if ( $index != $arrayLen-1 ) {
- print ", ";
- }
- }
- print "\n";
- }
-
- }
-
- print "};\n\n";
-}
-
-#--------------------------------------------------------------------------
-# Main
-#--------------------------------------------------------------------------
-
-sub main()
-{
-
- processCommandLine();
- srand($opts{"seed"});
-
- my @values1;
- my @values2;
- my @sum;
- for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
- my $value1 = int(rand(19));
- my $value2 = int(rand(19));
- push( @values1, $value1 );
- push( @values2, $value2 );
- push( @sum, $value1 + $value2 );
- }
-
-
- print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
- printArray( "input1_data", \@values1 );
- printArray( "input2_data", \@values2 );
- printArray( "verify_data", \@sum );
-
-}
-
-main();
-
--- /dev/null
+#include "stdlib.h"
+#include "dataset.h"
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ size_t i;
+ size_t leftover = n % (n / ncores);
+ for (i = coreid * (n / ncores); i < (coreid + 1) * (n / ncores); i++) {
+ z[i] = x[i] + y[i];
+ }
+ for (i = (n - leftover) + coreid; i < n; i += ncores) {
+ z[i] = x[i] + y[i];
+ }
+}
--- /dev/null
+#include "stdlib.h"
+#include "dataset.h"
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ size_t i;
+ for (i = coreid*4; i < n; i += 8*ncores) {
+ z[i] = x[i] + y[i];
+ z[i+1] = x[i+1] + y[i+1];
+ z[i+2] = x[i+2] + y[i+2];
+ z[i+3] = x[i+3] + y[i+3];
+ z[i+ncores*4] = x[i+ncores*4] + y[i+ncores*4];
+ z[i+ncores*4+1] = x[i+ncores*4+1] + y[i+ncores*4+1];
+ z[i+ncores*4+2] = x[i+ncores*4+2] + y[i+ncores*4+2];
+ z[i+ncores*4+3] = x[i+ncores*4+3] + y[i+ncores*4+3];
+ }
+}
--- /dev/null
+#include "stdlib.h"
+#include "dataset.h"
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ size_t i;
+ for (i = coreid; i < n; i += 2*ncores) {
+ z[i] = x[i] + y[i];
+ z[i+ncores] = x[i+ncores] + y[i+ncores];
+ }
+}
--- /dev/null
+#include "stdlib.h"
+#include "dataset.h"
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ data_t* to = &z[coreid * (n / ncores)];
+ const data_t* from1 = &x[coreid * (n / ncores)];
+ const data_t* from2 = &y[coreid * (n / ncores)];
+ size_t count = n / ncores;
+ size_t c = (count + 7) / 8;
+ switch(count % 8) {
+ case 0: do { *to++ = *from1++ + *from2++;
+ case 7: *to++ = *from1++ + *from2++;
+ case 6: *to++ = *from1++ + *from2++;
+ case 5: *to++ = *from1++ + *from2++;
+ case 4: *to++ = *from1++ + *from2++;
+ case 3: *to++ = *from1++ + *from2++;
+ case 2: *to++ = *from1++ + *from2++;
+ case 1: *to++ = *from1++ + *from2++;
+ } while(--c > 0);
+ }
+}
--- /dev/null
+#include "stdlib.h"
+#include "dataset.h"
+
+//--------------------------------------------------------------------------
+// vvadd function
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ size_t i;
+
+ // interleave accesses
+ for (i = coreid; i < n; i+=ncores)
+ {
+ z[i] = x[i] + y[i];
+ }
+}