+// See LICENSE for license details.
+
//**************************************************************************
// Quicksort benchmark
//--------------------------------------------------------------------------
// implementation is largely adapted from Numerical Recipes for C. The
// input data (and reference data) should be generated using the
// qsort_gendata.pl perl script and dumped to a file named
-// dataset1.h The smips-gcc toolchain does not support system calls
-// so printf's can only be used on a host system, not on the smips
-// processor simulator itself. You should not change anything except
-// the HOST_DEBUG and PREALLOCATE macros for your timing run.
+// dataset1.h
#include "util.h"
#include <string.h>
#define LOG_BASE 8
#define BASE (1 << LOG_BASE)
+#if 0
+# define fetch_add(ptr, inc) __sync_fetch_and_add(ptr, inc)
+#else
+# define fetch_add(ptr, inc) ((*(ptr) += (inc)) - (inc))
+#endif
+
void sort(size_t n, type* arrIn, type* scratchIn)
{
size_t log_exp = 0;
type a1 = p[1];
type a2 = p[2];
type a3 = p[3];
- __sync_fetch_and_add(&bucket[(a0 >> log_exp) % BASE], 1);
- __sync_fetch_and_add(&bucket[(a1 >> log_exp) % BASE], 1);
- __sync_fetch_and_add(&bucket[(a2 >> log_exp) % BASE], 1);
- __sync_fetch_and_add(&bucket[(a3 >> log_exp) % BASE], 1);
+ fetch_add(&bucket[(a0 >> log_exp) % BASE], 1);
+ fetch_add(&bucket[(a1 >> log_exp) % BASE], 1);
+ fetch_add(&bucket[(a2 >> log_exp) % BASE], 1);
+ fetch_add(&bucket[(a3 >> log_exp) % BASE], 1);
}
for ( ; p < &arr[n]; p++)
bucket[(*p >> log_exp) % BASE]++;
size_t prev = bucket[0];
- prev += __sync_fetch_and_add(&bucket[1], prev);
+ prev += fetch_add(&bucket[1], prev);
for (b = &bucket[2]; b < bucket + BASE; b += 2)
{
- prev += __sync_fetch_and_add(&b[0], prev);
- prev += __sync_fetch_and_add(&b[1], prev);
+ prev += fetch_add(&b[0], prev);
+ prev += fetch_add(&b[1], prev);
}
static_assert(BASE % 2 == 0);
size_t* pb1 = &bucket[(a1 >> log_exp) % BASE];
size_t* pb2 = &bucket[(a2 >> log_exp) % BASE];
size_t* pb3 = &bucket[(a3 >> log_exp) % BASE];
- type* s0 = scratch + __sync_fetch_and_add(pb0, -1);
- type* s1 = scratch + __sync_fetch_and_add(pb1, -1);
- type* s2 = scratch + __sync_fetch_and_add(pb2, -1);
- type* s3 = scratch + __sync_fetch_and_add(pb3, -1);
+ type* s0 = scratch + fetch_add(pb0, -1);
+ type* s1 = scratch + fetch_add(pb1, -1);
+ type* s2 = scratch + fetch_add(pb2, -1);
+ type* s3 = scratch + fetch_add(pb3, -1);
s0[-1] = a0;
s1[-1] = a1;
s2[-1] = a2;
int main( int argc, char* argv[] )
{
static type scratch[DATA_SIZE];
- // Output the input array
- printArray( "input", DATA_SIZE, input_data );
- printArray( "verify", DATA_SIZE, verify_data );
#if PREALLOCATE
// If needed we preallocate everything in the caches
sort(DATA_SIZE, input_data, scratch);
setStats(0);
- // Print out the results
- printArray( "test", DATA_SIZE, input_data );
-
// Check the results
return verify( DATA_SIZE, input_data, verify_data );
}