+++ /dev/null
-//**************************************************************************\r
-// Vector-vector add benchmark\r
-//--------------------------------------------------------------------------\r
-// Author : Andrew Waterman\r
-// TA : Christopher Celio\r
-// Student : \r
-//\r
-// This benchmark adds two vectors and writes the results to a\r
-// third vector. The input data (and reference data) should be\r
-// generated using the vvadd_gendata.pl perl script and dumped\r
-// to a file named dataset.h \r
-\r
-// to print out arrays, etc.\r
-//#define DEBUG\r
-\r
-//--------------------------------------------------------------------------\r
-// Includes \r
-\r
-#include <string.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-//--------------------------------------------------------------------------\r
-// Input/Reference Data\r
-\r
-typedef float data_t;\r
-#include "dataset.h"\r
- \r
- \r
-//--------------------------------------------------------------------------\r
-// Basic Utilities and Multi-thread Support\r
-\r
-__thread unsigned long coreid;\r
-unsigned long ncores;\r
-\r
-#include "util.h"\r
- \r
-#define stringify_1(s) #s\r
-#define stringify(s) stringify_1(s)\r
-#define stats(code) do { \\r
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \\r
- code; \\r
- _c += rdcycle(), _i += rdinstret(); \\r
- if (coreid == 0) \\r
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \\r
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \\r
- } while(0)\r
- \r
-\r
-//--------------------------------------------------------------------------\r
-// Helper functions\r
- \r
-void printArrayMT( char name[], int n, data_t arr[] )\r
-{\r
- int i;\r
- if (coreid != 0)\r
- return;\r
-\r
- printf( " %10s :", name );\r
- for ( i = 0; i < n; i++ )\r
- printf( " %4ld ", (long) arr[i] );\r
- printf( "\n" );\r
-}\r
- \r
-void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
-{\r
- if (coreid != 0)\r
- return;\r
-\r
- size_t i;\r
- for (i = 0; i < n; i++)\r
- {\r
- if (test[i] != correct[i])\r
- {\r
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", \r
- i, (long) test[i], i, (long)correct[i]);\r
- exit(-1);\r
- }\r
- }\r
- \r
- return;\r
-}\r
- \r
-//--------------------------------------------------------------------------\r
-// vvadd function\r
-\r
-//perform in-place vvadd\r
-void __attribute__((noinline)) vvadd(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)\r
-{\r
- size_t i;\r
-\r
- // interleave accesses\r
- for (i = coreid; i < n; i+=ncores)\r
- {\r
- x[i] = x[i] + y[i];\r
- }\r
-}\r
-\r
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)\r
-{\r
- // ***************************** //\r
- // **** ADD YOUR CODE HERE ***** //\r
- // ***************************** //\r
- size_t i;\r
-\r
- for (i = coreid*(n/ncores); i<(coreid+1)*n/ncores; i++)\r
- {\r
- x[i] = x[i] + y[i];\r
- }\r
-}\r
-\r
-//--------------------------------------------------------------------------\r
-// Main\r
-//\r
-// all threads start executing thread_entry(). Use their "coreid" to\r
-// differentiate between threads (each thread is running on a separate core).\r
- \r
-void thread_entry(int cid, int nc)\r
-{\r
- coreid = cid;\r
- ncores = nc;\r
-\r
- // static allocates data in the binary, which is visible to both threads\r
- static data_t results_data[DATA_SIZE];\r
- \r
- // because we're going to perform an in-place vvadd (and we're going to run\r
- // it a couple of times) let's copy the input data to a temporary results\r
- // array\r
- \r
- size_t i;\r
- if (coreid == 0)\r
- {\r
- for (i = 0; i < DATA_SIZE; i++)\r
- results_data[i] = input1_data[i];\r
- }\r
-\r
-\r
- // Execute the provided, terrible vvadd\r
- barrier(nc);\r
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));\r
- \r
- \r
- // verify\r
- verifyMT(DATA_SIZE, results_data, verify_data);\r
- \r
- // reset results from the first trial\r
- if (coreid == 0) \r
- {\r
- for (i=0; i < DATA_SIZE; i++)\r
- results_data[i] = input1_data[i];\r
- }\r
- barrier(nc);\r
- \r
- \r
- // Execute your faster vvadd\r
- barrier(nc);\r
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));\r
-\r
-#ifdef DEBUG\r
- printArrayMT("results: ", DATA_SIZE, results_data);\r
- printArrayMT("verify : ", DATA_SIZE, verify_data);\r
-#endif\r
- \r
- // verify\r
- verifyMT(DATA_SIZE, results_data, verify_data);\r
- barrier(nc);\r
-\r
- exit(0);\r
-}\r
-\r