2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include <gtest/gtest.h>
27 #include "util/bigmath.h"
28 #include "util/fast_idiv_by_const.h"
29 #include "util/u_math.h"
31 #define RAND_TEST_ITERATIONS 100000
33 #define MAX_UINT(bits) \
34 (((bits) == 64) ? UINT64_MAX : ((1ull << (bits)) - 1))
36 static inline uint64_t
37 utrunc(uint64_t x
, unsigned num_bits
)
42 return (x
<< (64 - num_bits
)) >> (64 - num_bits
);
46 strunc(int64_t x
, unsigned num_bits
)
51 return (x
<< (64 - num_bits
)) >> (64 - num_bits
);
55 uint_is_in_range(uint64_t x
, unsigned num_bits
)
60 return x
< (1ull << num_bits
);
64 sint_is_in_range(int64_t x
, unsigned num_bits
)
69 return x
>= -(1ll << (num_bits
- 1)) &&
70 x
< (1ll << (num_bits
- 1));
73 static inline uint64_t
74 uadd_sat(uint64_t a
, uint64_t b
, unsigned num_bits
)
76 assert(uint_is_in_range(a
, num_bits
));
77 assert(uint_is_in_range(b
, num_bits
));
81 /* Standard overflow check */
82 return sum
< a
? UINT64_MAX
: sum
;
84 /* Check if sum is more than num_bits */
85 return (sum
>> num_bits
) ? MAX_UINT(num_bits
) : sum
;
89 static inline uint64_t
90 umul_add_high(uint64_t a
, uint64_t b
, uint64_t c
, unsigned num_bits
)
92 assert(uint_is_in_range(a
, num_bits
));
93 assert(uint_is_in_range(b
, num_bits
));
94 assert(uint_is_in_range(c
, num_bits
));
97 uint32_t a32
[2] = { (uint32_t)a
, (uint32_t)(a
>> 32) };
98 uint32_t b32
[2] = { (uint32_t)b
, (uint32_t)(b
>> 32) };
99 uint32_t c32
[2] = { (uint32_t)c
, (uint32_t)(c
>> 32) };
102 ubm_mul_u32arr(ab32
, a32
, b32
);
105 ubm_add_u32arr(abc32
, ab32
, c32
);
107 return abc32
[2] | ((uint64_t)abc32
[3] << 32);
109 assert(num_bits
<= 32);
110 return utrunc(((a
* b
) + c
) >> num_bits
, num_bits
);
114 static inline int64_t
115 smul_high(int64_t a
, int64_t b
, unsigned num_bits
)
117 assert(sint_is_in_range(a
, num_bits
));
118 assert(sint_is_in_range(b
, num_bits
));
120 if (num_bits
== 64) {
124 (uint32_t)(a
>> 63), /* sign extend */
125 (uint32_t)(a
>> 63), /* sign extend */
130 (uint32_t)(b
>> 63), /* sign extend */
131 (uint32_t)(b
>> 63), /* sign extend */
135 ubm_mul_u32arr(ab32
, a32
, b32
);
137 return ab32
[2] | ((uint64_t)ab32
[3] << 32);
139 assert(num_bits
<= 32);
140 return strunc((a
* b
) >> num_bits
, num_bits
);
144 static inline uint64_t
145 fast_udiv_add_sat(uint64_t n
, struct util_fast_udiv_info m
, unsigned num_bits
)
147 assert(uint_is_in_range(n
, num_bits
));
148 assert(uint_is_in_range(m
.multiplier
, num_bits
));
150 n
= n
>> m
.pre_shift
;
151 n
= uadd_sat(n
, m
.increment
, num_bits
);
152 n
= umul_add_high(n
, m
.multiplier
, 0, num_bits
);
153 n
= n
>> m
.post_shift
;
158 static inline uint64_t
159 fast_udiv_mul_add(uint64_t n
, struct util_fast_udiv_info m
, unsigned num_bits
)
161 assert(uint_is_in_range(n
, num_bits
));
162 assert(uint_is_in_range(m
.multiplier
, num_bits
));
164 n
= n
>> m
.pre_shift
;
165 n
= umul_add_high(n
, m
.multiplier
,
166 m
.increment
? m
.multiplier
: 0,
168 n
= n
>> m
.post_shift
;
173 static inline uint64_t
174 fast_sdiv(int64_t n
, int64_t d
, struct util_fast_sdiv_info m
, unsigned num_bits
)
176 assert(sint_is_in_range(n
, num_bits
));
177 assert(sint_is_in_range(d
, num_bits
));
178 assert(sint_is_in_range(m
.multiplier
, num_bits
));
181 res
= smul_high(n
, m
.multiplier
, num_bits
);
182 if (d
> 0 && m
.multiplier
< 0)
183 res
= strunc(res
+ n
, num_bits
);
184 if (d
< 0 && m
.multiplier
> 0)
185 res
= strunc(res
- n
, num_bits
);
186 res
= res
>> m
.shift
;
187 res
= res
- (res
>> (num_bits
- 1));
193 rand_uint(unsigned bits
, unsigned min
)
197 /* Make sure we get some small and large numbers and powers of two every
202 return min
+ (rand() % 16);
203 } else if (k
== 42) {
204 return MAX_UINT(bits
) - (rand() % 16);
208 r
= 1ull << (rand() % bits
);
216 for (unsigned i
= 0; i
< 8; i
++)
217 r
|= ((uint64_t)rand() & 0xf) << i
* 8;
218 return r
>> (63 - (rand() % bits
));
222 r
= rand_uint(bits
, 0);
229 rand_sint(unsigned bits
, unsigned min_abs
)
231 /* Make sure we hit MIN_INT every once in a while */
232 if (rand() % 64 == 37)
233 return -(1 << (bits
- 1));
235 int64_t s
= rand_uint(bits
- 1, min_abs
);
236 return rand() & 1 ? s
: -s
;
240 udiv(uint64_t a
, uint64_t b
, unsigned bit_size
)
243 case 64: return (uint64_t)a
/ (uint64_t)b
;
244 case 32: return (uint32_t)a
/ (uint32_t)b
;
245 case 16: return (uint16_t)a
/ (uint16_t)b
;
246 case 8: return (uint8_t)a
/ (uint8_t)b
;
248 assert(!"Invalid bit size");
254 sdiv(int64_t a
, int64_t b
, unsigned bit_size
)
257 case 64: return (int64_t)a
/ (int64_t)b
;
258 case 32: return (int32_t)a
/ (int32_t)b
;
259 case 16: return (int16_t)a
/ (int16_t)b
;
260 case 8: return (int8_t)a
/ (int8_t)b
;
262 assert(!"Invalid bit size");
268 random_udiv_add_sat_test(unsigned bits
, bool bounded
)
270 for (unsigned i
= 0; i
< RAND_TEST_ITERATIONS
; i
++) {
271 uint64_t n
= rand_uint(bits
, 0);
272 uint64_t d
= rand_uint(bits
, 2);
273 assert(uint_is_in_range(n
, bits
));
274 assert(uint_is_in_range(d
, bits
) && d
>= 2);
276 unsigned n_bits
= bounded
? util_logbase2_64(MAX2(n
, 1)) + 1 : bits
;
278 struct util_fast_udiv_info m
=
279 util_compute_fast_udiv_info(d
, n_bits
, bits
);
280 EXPECT_EQ(fast_udiv_add_sat(n
, m
, bits
), udiv(n
, d
, bits
));
285 random_udiv_mul_add_test(unsigned bits
, bool bounded
)
287 for (unsigned i
= 0; i
< RAND_TEST_ITERATIONS
; i
++) {
288 uint64_t n
= rand_uint(bits
, 0);
289 uint64_t d
= rand_uint(bits
, 1);
290 assert(uint_is_in_range(n
, bits
));
291 assert(uint_is_in_range(d
, bits
) && d
>= 1);
293 unsigned n_bits
= bounded
? util_logbase2_64(MAX2(n
, 1)) + 1: bits
;
295 struct util_fast_udiv_info m
=
296 util_compute_fast_udiv_info(d
, n_bits
, bits
);
297 EXPECT_EQ(fast_udiv_mul_add(n
, m
, bits
), udiv(n
, d
, bits
));
302 random_sdiv_test(unsigned bits
)
304 for (unsigned i
= 0; i
< RAND_TEST_ITERATIONS
; i
++) {
305 int64_t n
= rand_sint(bits
, 0);
308 d
= rand_sint(bits
, 2);
309 } while (util_is_power_of_two_or_zero64(llabs(d
)));
311 assert(sint_is_in_range(n
, bits
));
312 assert(sint_is_in_range(d
, bits
) && llabs(d
) >= 2);
314 struct util_fast_sdiv_info m
=
315 util_compute_fast_sdiv_info(d
, bits
);
316 EXPECT_EQ(fast_sdiv(n
, d
, m
, bits
), sdiv(n
, d
, bits
));
320 TEST(fast_idiv_by_const
, uint8_add_sat
)
322 /* 8-bit is small enough we can brute-force the entire space */
323 for (unsigned d
= 2; d
< 256; d
++) {
324 for (unsigned n_bits
= 1; n_bits
<= 8; n_bits
++) {
325 struct util_fast_udiv_info m
=
326 util_compute_fast_udiv_info(d
, n_bits
, 8);
328 for (unsigned n
= 0; n
< (1u << n_bits
); n
++)
329 EXPECT_EQ(fast_udiv_add_sat(n
, m
, 8), udiv(n
, d
, 8));
334 TEST(fast_idiv_by_const
, uint8_mul_add
)
336 /* 8-bit is small enough we can brute-force the entire space */
337 for (unsigned d
= 2; d
< 256; d
++) {
338 for (unsigned n_bits
= 1; n_bits
<= 8; n_bits
++) {
339 struct util_fast_udiv_info m
=
340 util_compute_fast_udiv_info(d
, n_bits
, 8);
342 for (unsigned n
= 0; n
< (1u << n_bits
); n
++)
343 EXPECT_EQ(fast_udiv_mul_add(n
, m
, 8), udiv(n
, d
, 8));
348 TEST(fast_idiv_by_const
, int8
)
350 /* 8-bit is small enough we can brute-force the entire space */
351 for (int n
= -128; n
< 128; n
++) {
352 for (int d
= -128; d
< 128; d
++) {
353 if (util_is_power_of_two_or_zero(abs(d
)))
356 struct util_fast_sdiv_info m
=
357 util_compute_fast_sdiv_info(d
, 8);
358 EXPECT_EQ(fast_sdiv(n
, d
, m
, 8), sdiv(n
, d
, 8));
363 TEST(fast_idiv_by_const
, uint16_add_sat_bounded
)
365 random_udiv_add_sat_test(16, true);
368 TEST(fast_idiv_by_const
, uint16_add_sat_full
)
370 random_udiv_add_sat_test(16, false);
373 TEST(fast_idiv_by_const
, uint16_mul_add_bounded
)
375 random_udiv_mul_add_test(16, true);
378 TEST(fast_idiv_by_const
, uint16_mul_add_full
)
380 random_udiv_mul_add_test(16, false);
383 TEST(fast_idiv_by_const
, int16
)
385 random_sdiv_test(16);
388 TEST(fast_idiv_by_const
, uint32_add_sat_bounded
)
390 random_udiv_add_sat_test(32, true);
393 TEST(fast_idiv_by_const
, uint32_add_sat_full
)
395 random_udiv_add_sat_test(32, false);
398 TEST(fast_idiv_by_const
, uint32_mul_add_bounded
)
400 random_udiv_mul_add_test(32, true);
403 TEST(fast_idiv_by_const
, uint32_mul_add_full
)
405 random_udiv_mul_add_test(32, false);
408 TEST(fast_idiv_by_const
, int32
)
410 random_sdiv_test(32);
413 TEST(fast_idiv_by_const
, util_fast_udiv32
)
415 for (unsigned i
= 0; i
< RAND_TEST_ITERATIONS
; i
++) {
416 uint32_t n
= rand_uint(32, 0);
417 uint32_t d
= rand_uint(32, 1);
419 struct util_fast_udiv_info m
=
420 util_compute_fast_udiv_info(d
, 32, 32);
421 EXPECT_EQ(util_fast_udiv32(n
, m
), n
/ d
);
425 TEST(fast_idiv_by_const
, util_fast_udiv32_nuw
)
427 for (unsigned i
= 0; i
< RAND_TEST_ITERATIONS
; i
++) {
428 uint32_t n
= rand_uint(32, 0);
431 uint32_t d
= rand_uint(32, 1);
433 struct util_fast_udiv_info m
=
434 util_compute_fast_udiv_info(d
, 32, 32);
435 EXPECT_EQ(util_fast_udiv32_nuw(n
, m
), n
/ d
);
439 TEST(fast_idiv_by_const
, util_fast_udiv32_u31_d_not_one
)
441 for (unsigned i
= 0; i
< RAND_TEST_ITERATIONS
; i
++) {
442 uint32_t n
= rand_uint(31, 0);
443 uint32_t d
= rand_uint(31, 2);
445 struct util_fast_udiv_info m
=
446 util_compute_fast_udiv_info(d
, 31, 32);
447 EXPECT_EQ(util_fast_udiv32_u31_d_not_one(n
, m
), n
/ d
);
451 TEST(fast_idiv_by_const
, uint64_add_sat_bounded
)
453 random_udiv_add_sat_test(64, true);
456 TEST(fast_idiv_by_const
, uint64_add_sat_full
)
458 random_udiv_add_sat_test(64, false);
461 TEST(fast_idiv_by_const
, uint64_mul_add_bounded
)
463 random_udiv_mul_add_test(64, true);
466 TEST(fast_idiv_by_const
, uint64_mul_add_full
)
468 random_udiv_mul_add_test(64, false);
471 TEST(fast_idiv_by_const
, int64
)
473 random_sdiv_test(64);