src/util/tests/fast_idiv_by_const/fast_idiv_by_const_test.cpp

   1 /*
   2  * Copyright © 2018 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #undef NDEBUG
  25
  26 #include <gtest/gtest.h>
  27 #include "util/bigmath.h"
  28 #include "util/fast_idiv_by_const.h"
  29 #include "util/u_math.h"
  30
  31 #define RAND_TEST_ITERATIONS 100000
  32
  33 #define MAX_UINT(bits) \
  34    (((bits) == 64) ? UINT64_MAX : ((1ull << (bits)) - 1))
  35
  36 static inline uint64_t
  37 utrunc(uint64_t x, unsigned num_bits)
  38 {
  39    if (num_bits == 64)
  40       return x;
  41
  42    return (x << (64 - num_bits)) >> (64 - num_bits);
  43 }
  44
  45 static inline int64_t
  46 strunc(int64_t x, unsigned num_bits)
  47 {
  48    if (num_bits == 64)
  49       return x;
  50
  51    return (x << (64 - num_bits)) >> (64 - num_bits);
  52 }
  53
  54 static inline bool
  55 uint_is_in_range(uint64_t x, unsigned num_bits)
  56 {
  57    if (num_bits == 64)
  58       return true;
  59
  60    return x < (1ull << num_bits);
  61 }
  62
  63 static inline bool
  64 sint_is_in_range(int64_t x, unsigned num_bits)
  65 {
  66    if (num_bits == 64)
  67       return true;
  68
  69    return x >= -(1ll << (num_bits - 1)) &&
  70           x < (1ll << (num_bits - 1));
  71 }
  72
  73 static inline uint64_t
  74 uadd_sat(uint64_t a, uint64_t b, unsigned num_bits)
  75 {
  76    assert(uint_is_in_range(a, num_bits));
  77    assert(uint_is_in_range(b, num_bits));
  78
  79    uint64_t sum = a + b;
  80    if (num_bits == 64) {
  81       /* Standard overflow check */
  82       return sum < a ? UINT64_MAX : sum;
  83    } else {
  84       /* Check if sum is more than num_bits */
  85       return (sum >> num_bits) ? MAX_UINT(num_bits) : sum;
  86    }
  87 }
  88
  89 static inline uint64_t
  90 umul_add_high(uint64_t a, uint64_t b, uint64_t c, unsigned num_bits)
  91 {
  92    assert(uint_is_in_range(a, num_bits));
  93    assert(uint_is_in_range(b, num_bits));
  94    assert(uint_is_in_range(c, num_bits));
  95
  96    if (num_bits == 64) {
  97       uint32_t a32[2] = { (uint32_t)a, (uint32_t)(a >> 32) };
  98       uint32_t b32[2] = { (uint32_t)b, (uint32_t)(b >> 32) };
  99       uint32_t c32[2] = { (uint32_t)c, (uint32_t)(c >> 32) };
 100
 101       uint32_t ab32[4];
 102       ubm_mul_u32arr(ab32, a32, b32);
 103
 104       uint32_t abc32[4];
 105       ubm_add_u32arr(abc32, ab32, c32);
 106
 107       return abc32[2] | ((uint64_t)abc32[3] << 32);
 108    } else {
 109       assert(num_bits <= 32);
 110       return utrunc(((a * b) + c) >> num_bits, num_bits);
 111    }
 112 }
 113
 114 static inline int64_t
 115 smul_high(int64_t a, int64_t b, unsigned num_bits)
 116 {
 117    assert(sint_is_in_range(a, num_bits));
 118    assert(sint_is_in_range(b, num_bits));
 119
 120    if (num_bits == 64) {
 121       uint32_t a32[4] = {
 122          (uint32_t)a,
 123          (uint32_t)(a >> 32),
 124          (uint32_t)(a >> 63), /* sign extend */
 125          (uint32_t)(a >> 63), /* sign extend */
 126       };
 127       uint32_t b32[4] = {
 128          (uint32_t)b,
 129          (uint32_t)(b >> 32),
 130          (uint32_t)(b >> 63), /* sign extend */
 131          (uint32_t)(b >> 63), /* sign extend */
 132       };
 133
 134       uint32_t ab32[4];
 135       ubm_mul_u32arr(ab32, a32, b32);
 136
 137       return ab32[2] | ((uint64_t)ab32[3] << 32);
 138    } else {
 139       assert(num_bits <= 32);
 140       return strunc((a * b) >> num_bits, num_bits);
 141    }
 142 }
 143
 144 static inline uint64_t
 145 fast_udiv_add_sat(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
 146 {
 147    assert(uint_is_in_range(n, num_bits));
 148    assert(uint_is_in_range(m.multiplier, num_bits));
 149
 150    n = n >> m.pre_shift;
 151    n = uadd_sat(n, m.increment, num_bits);
 152    n = umul_add_high(n, m.multiplier, 0, num_bits);
 153    n = n >> m.post_shift;
 154
 155    return n;
 156 }
 157
 158 static inline uint64_t
 159 fast_udiv_mul_add(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
 160 {
 161    assert(uint_is_in_range(n, num_bits));
 162    assert(uint_is_in_range(m.multiplier, num_bits));
 163
 164    n = n >> m.pre_shift;
 165    n = umul_add_high(n, m.multiplier,
 166                      m.increment ? m.multiplier : 0,
 167                      num_bits);
 168    n = n >> m.post_shift;
 169
 170    return n;
 171 }
 172
 173 static inline uint64_t
 174 fast_sdiv(int64_t n, int64_t d, struct util_fast_sdiv_info m, unsigned num_bits)
 175 {
 176    assert(sint_is_in_range(n, num_bits));
 177    assert(sint_is_in_range(d, num_bits));
 178    assert(sint_is_in_range(m.multiplier, num_bits));
 179
 180    int64_t res;
 181    res = smul_high(n, m.multiplier, num_bits);
 182    if (d > 0 && m.multiplier < 0)
 183       res = strunc(res + n, num_bits);
 184    if (d < 0 && m.multiplier > 0)
 185       res = strunc(res - n, num_bits);
 186    res = res >> m.shift;
 187    res = res - (res >> (num_bits - 1));
 188
 189    return res;
 190 }
 191
 192 static uint64_t
 193 rand_uint(unsigned bits, unsigned min)
 194 {
 195    assert(bits >= 4);
 196
 197    /* Make sure we get some small and large numbers and powers of two every
 198     * once in a while
 199     */
 200    int k = rand() % 64;
 201    if (k == 17) {
 202       return min + (rand() % 16);
 203    } else if (k == 42) {
 204       return MAX_UINT(bits) - (rand() % 16);
 205    } else if (k == 9) {
 206       uint64_t r;
 207       do {
 208          r = 1ull << (rand() % bits);
 209       } while (r < min);
 210       return r;
 211    }
 212
 213    if (min == 0) {
 214       assert(bits <= 64);
 215       uint64_t r = 0;
 216       for (unsigned i = 0; i < 8; i++)
 217          r |= ((uint64_t)rand() & 0xf) << i * 8;
 218       return r >> (63 - (rand() % bits));
 219    } else {
 220       uint64_t r;
 221       do {
 222          r = rand_uint(bits, 0);
 223       } while (r < min);
 224       return r;
 225    }
 226 }
 227
 228 static int64_t
 229 rand_sint(unsigned bits, unsigned min_abs)
 230 {
 231    /* Make sure we hit MIN_INT every once in a while */
 232    if (rand() % 64 == 37)
 233       return -(1 << (bits - 1));
 234
 235    int64_t s = rand_uint(bits - 1, min_abs);
 236    return rand() & 1 ? s : -s;
 237 }
 238
 239 static uint64_t
 240 udiv(uint64_t a, uint64_t b, unsigned bit_size)
 241 {
 242    switch (bit_size) {
 243    case 64: return (uint64_t)a / (uint64_t)b;
 244    case 32: return (uint32_t)a / (uint32_t)b;
 245    case 16: return (uint16_t)a / (uint16_t)b;
 246    case 8:  return  (uint8_t)a / (uint8_t)b;
 247    default:
 248       assert(!"Invalid bit size");
 249       return 0;
 250    }
 251 }
 252
 253 static int64_t
 254 sdiv(int64_t a, int64_t b, unsigned bit_size)
 255 {
 256    switch (bit_size) {
 257    case 64: return (int64_t)a / (int64_t)b;
 258    case 32: return (int32_t)a / (int32_t)b;
 259    case 16: return (int16_t)a / (int16_t)b;
 260    case 8:  return  (int8_t)a / (int8_t)b;
 261    default:
 262       assert(!"Invalid bit size");
 263       return 0;
 264    }
 265 }
 266
 267 static void
 268 random_udiv_add_sat_test(unsigned bits, bool bounded)
 269 {
 270    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
 271       uint64_t n = rand_uint(bits, 0);
 272       uint64_t d = rand_uint(bits, 2);
 273       assert(uint_is_in_range(n, bits));
 274       assert(uint_is_in_range(d, bits) && d >= 2);
 275
 276       unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1 : bits;
 277
 278       struct util_fast_udiv_info m =
 279          util_compute_fast_udiv_info(d, n_bits, bits);
 280       EXPECT_EQ(fast_udiv_add_sat(n, m, bits), udiv(n, d, bits));
 281    }
 282 }
 283
 284 static void
 285 random_udiv_mul_add_test(unsigned bits, bool bounded)
 286 {
 287    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
 288       uint64_t n = rand_uint(bits, 0);
 289       uint64_t d = rand_uint(bits, 1);
 290       assert(uint_is_in_range(n, bits));
 291       assert(uint_is_in_range(d, bits) && d >= 1);
 292
 293       unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1: bits;
 294
 295       struct util_fast_udiv_info m =
 296          util_compute_fast_udiv_info(d, n_bits, bits);
 297       EXPECT_EQ(fast_udiv_mul_add(n, m, bits), udiv(n, d, bits));
 298    }
 299 }
 300
 301 static void
 302 random_sdiv_test(unsigned bits)
 303 {
 304    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
 305       int64_t n = rand_sint(bits, 0);
 306       int64_t d;
 307       do {
 308          d = rand_sint(bits, 2);
 309       } while (util_is_power_of_two_or_zero64(llabs(d)));
 310
 311       assert(sint_is_in_range(n, bits));
 312       assert(sint_is_in_range(d, bits) && llabs(d) >= 2);
 313
 314       struct util_fast_sdiv_info m =
 315          util_compute_fast_sdiv_info(d, bits);
 316       EXPECT_EQ(fast_sdiv(n, d, m, bits), sdiv(n, d, bits));
 317    }
 318 }
 319
 320 TEST(fast_idiv_by_const, uint8_add_sat)
 321 {
 322    /* 8-bit is small enough we can brute-force the entire space */
 323    for (unsigned d = 2; d < 256; d++) {
 324       for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
 325          struct util_fast_udiv_info m =
 326             util_compute_fast_udiv_info(d, n_bits, 8);
 327
 328          for (unsigned n = 0; n < (1u << n_bits); n++)
 329             EXPECT_EQ(fast_udiv_add_sat(n, m, 8), udiv(n, d, 8));
 330       }
 331    }
 332 }
 333
 334 TEST(fast_idiv_by_const, uint8_mul_add)
 335 {
 336    /* 8-bit is small enough we can brute-force the entire space */
 337    for (unsigned d = 2; d < 256; d++) {
 338       for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
 339          struct util_fast_udiv_info m =
 340             util_compute_fast_udiv_info(d, n_bits, 8);
 341
 342          for (unsigned n = 0; n < (1u << n_bits); n++)
 343             EXPECT_EQ(fast_udiv_mul_add(n, m, 8), udiv(n, d, 8));
 344       }
 345    }
 346 }
 347
 348 TEST(fast_idiv_by_const, int8)
 349 {
 350    /* 8-bit is small enough we can brute-force the entire space */
 351    for (int n = -128; n < 128; n++) {
 352       for (int d = -128; d < 128; d++) {
 353          if (util_is_power_of_two_or_zero(abs(d)))
 354             continue;
 355
 356          struct util_fast_sdiv_info m =
 357             util_compute_fast_sdiv_info(d, 8);
 358          EXPECT_EQ(fast_sdiv(n, d, m, 8), sdiv(n, d, 8));
 359       }
 360    }
 361 }
 362
 363 TEST(fast_idiv_by_const, uint16_add_sat_bounded)
 364 {
 365    random_udiv_add_sat_test(16, true);
 366 }
 367
 368 TEST(fast_idiv_by_const, uint16_add_sat_full)
 369 {
 370    random_udiv_add_sat_test(16, false);
 371 }
 372
 373 TEST(fast_idiv_by_const, uint16_mul_add_bounded)
 374 {
 375    random_udiv_mul_add_test(16, true);
 376 }
 377
 378 TEST(fast_idiv_by_const, uint16_mul_add_full)
 379 {
 380    random_udiv_mul_add_test(16, false);
 381 }
 382
 383 TEST(fast_idiv_by_const, int16)
 384 {
 385    random_sdiv_test(16);
 386 }
 387
 388 TEST(fast_idiv_by_const, uint32_add_sat_bounded)
 389 {
 390    random_udiv_add_sat_test(32, true);
 391 }
 392
 393 TEST(fast_idiv_by_const, uint32_add_sat_full)
 394 {
 395    random_udiv_add_sat_test(32, false);
 396 }
 397
 398 TEST(fast_idiv_by_const, uint32_mul_add_bounded)
 399 {
 400    random_udiv_mul_add_test(32, true);
 401 }
 402
 403 TEST(fast_idiv_by_const, uint32_mul_add_full)
 404 {
 405    random_udiv_mul_add_test(32, false);
 406 }
 407
 408 TEST(fast_idiv_by_const, int32)
 409 {
 410    random_sdiv_test(32);
 411 }
 412
 413 TEST(fast_idiv_by_const, util_fast_udiv32)
 414 {
 415    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
 416       uint32_t n = rand_uint(32, 0);
 417       uint32_t d = rand_uint(32, 1);
 418
 419       struct util_fast_udiv_info m =
 420          util_compute_fast_udiv_info(d, 32, 32);
 421       EXPECT_EQ(util_fast_udiv32(n, m), n / d);
 422    }
 423 }
 424
 425 TEST(fast_idiv_by_const, util_fast_udiv32_nuw)
 426 {
 427    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
 428       uint32_t n = rand_uint(32, 0);
 429       if (n == UINT32_MAX)
 430          continue;
 431       uint32_t d = rand_uint(32, 1);
 432
 433       struct util_fast_udiv_info m =
 434          util_compute_fast_udiv_info(d, 32, 32);
 435       EXPECT_EQ(util_fast_udiv32_nuw(n, m), n / d);
 436    }
 437 }
 438
 439 TEST(fast_idiv_by_const, util_fast_udiv32_u31_d_not_one)
 440 {
 441    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
 442       uint32_t n = rand_uint(31, 0);
 443       uint32_t d = rand_uint(31, 2);
 444
 445       struct util_fast_udiv_info m =
 446          util_compute_fast_udiv_info(d, 31, 32);
 447       EXPECT_EQ(util_fast_udiv32_u31_d_not_one(n, m), n / d);
 448    }
 449 }
 450
 451 TEST(fast_idiv_by_const, uint64_add_sat_bounded)
 452 {
 453    random_udiv_add_sat_test(64, true);
 454 }
 455
 456 TEST(fast_idiv_by_const, uint64_add_sat_full)
 457 {
 458    random_udiv_add_sat_test(64, false);
 459 }
 460
 461 TEST(fast_idiv_by_const, uint64_mul_add_bounded)
 462 {
 463    random_udiv_mul_add_test(64, true);
 464 }
 465
 466 TEST(fast_idiv_by_const, uint64_mul_add_full)
 467 {
 468    random_udiv_mul_add_test(64, false);
 469 }
 470
 471 TEST(fast_idiv_by_const, int64)
 472 {
 473    random_sdiv_test(64);
 474 }