Fix TLS in benchmarks
[riscv-tests.git] / benchmarks / rsort / rsort.c
1 //**************************************************************************
2 // Quicksort benchmark
3 //--------------------------------------------------------------------------
4 //
5 // This benchmark uses quicksort to sort an array of integers. The
6 // implementation is largely adapted from Numerical Recipes for C. The
7 // input data (and reference data) should be generated using the
8 // qsort_gendata.pl perl script and dumped to a file named
9 // dataset1.h The smips-gcc toolchain does not support system calls
10 // so printf's can only be used on a host system, not on the smips
11 // processor simulator itself. You should not change anything except
12 // the HOST_DEBUG and PREALLOCATE macros for your timing run.
13
14 #include "util.h"
15 #include <string.h>
16 #include <limits.h>
17
18 //--------------------------------------------------------------------------
19 // Input/Reference Data
20
21 #define type unsigned int
22 #include "dataset1.h"
23
24 #define LOG_BASE 8
25 #define BASE (1 << LOG_BASE)
26
27 void sort(size_t n, type* arrIn, type* scratchIn)
28 {
29 size_t log_exp = 0;
30 size_t buckets[BASE];
31 size_t *bucket = buckets;
32 asm("":"+r"(bucket));
33 type *arr = arrIn, *scratch = scratchIn, *p;
34 size_t *b;
35
36 while (log_exp < CHAR_BIT * sizeof(type))
37 {
38 for (b = bucket; b < bucket + BASE; b++)
39 *b = 0;
40
41 for (p = arr; p < &arr[n-3]; p += 4)
42 {
43 type a0 = p[0];
44 type a1 = p[1];
45 type a2 = p[2];
46 type a3 = p[3];
47 __sync_fetch_and_add(&bucket[(a0 >> log_exp) % BASE], 1);
48 __sync_fetch_and_add(&bucket[(a1 >> log_exp) % BASE], 1);
49 __sync_fetch_and_add(&bucket[(a2 >> log_exp) % BASE], 1);
50 __sync_fetch_and_add(&bucket[(a3 >> log_exp) % BASE], 1);
51 }
52 for ( ; p < &arr[n]; p++)
53 bucket[(*p >> log_exp) % BASE]++;
54
55 size_t prev = bucket[0];
56 prev += __sync_fetch_and_add(&bucket[1], prev);
57 for (b = &bucket[2]; b < bucket + BASE; b += 2)
58 {
59 prev += __sync_fetch_and_add(&b[0], prev);
60 prev += __sync_fetch_and_add(&b[1], prev);
61 }
62 static_assert(BASE % 2 == 0);
63
64 for (p = &arr[n-1]; p >= &arr[3]; p -= 4)
65 {
66 type a0 = p[-0];
67 type a1 = p[-1];
68 type a2 = p[-2];
69 type a3 = p[-3];
70 size_t* pb0 = &bucket[(a0 >> log_exp) % BASE];
71 size_t* pb1 = &bucket[(a1 >> log_exp) % BASE];
72 size_t* pb2 = &bucket[(a2 >> log_exp) % BASE];
73 size_t* pb3 = &bucket[(a3 >> log_exp) % BASE];
74 type* s0 = scratch + __sync_fetch_and_add(pb0, -1);
75 type* s1 = scratch + __sync_fetch_and_add(pb1, -1);
76 type* s2 = scratch + __sync_fetch_and_add(pb2, -1);
77 type* s3 = scratch + __sync_fetch_and_add(pb3, -1);
78 s0[-1] = a0;
79 s1[-1] = a1;
80 s2[-1] = a2;
81 s3[-1] = a3;
82 }
83 for ( ; p >= &arr[0]; p--)
84 scratch[--bucket[(*p >> log_exp) % BASE]] = *p;
85
86 type* tmp = arr;
87 arr = scratch;
88 scratch = tmp;
89
90 log_exp += LOG_BASE;
91 }
92 if (arr != arrIn)
93 memcpy(arr, scratch, n*sizeof(type));
94 }
95
96 //--------------------------------------------------------------------------
97 // Main
98
99 int main( int argc, char* argv[] )
100 {
101 static type scratch[DATA_SIZE];
102 // Output the input array
103 printArray( "input", DATA_SIZE, input_data );
104 printArray( "verify", DATA_SIZE, verify_data );
105
106 #if PREALLOCATE
107 // If needed we preallocate everything in the caches
108 sort(DATA_SIZE, verify_data, scratch);
109 if (verify(DATA_SIZE, input_data, input_data))
110 return 1;
111 #endif
112
113 // Do the sort
114 setStats(1);
115 sort(DATA_SIZE, input_data, scratch);
116 setStats(0);
117
118 // Print out the results
119 printArray( "test", DATA_SIZE, input_data );
120
121 // Check the results
122 return verify( DATA_SIZE, input_data, verify_data );
123 }