src/gallium/drivers/llvmpipe/lp_bld_arit.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * Helper
  32  *
  33  * LLVM IR doesn't support all basic arithmetic operations we care about (most
  34  * notably min/max and saturated operations), and it is often necessary to
  35  * resort machine-specific intrinsics directly. The functions here hide all
  36  * these implementation details from the other modules.
  37  *
  38  * We also do simple expressions simplification here. Reasons are:
  39  * - it is very easy given we have all necessary information readily available
  40  * - LLVM optimization passes fail to simplify several vector expressions
  41  * - We often know value constraints which the optimization passes have no way
  42  *   of knowing, such as when source arguments are known to be in [0, 1] range.
  43  *
  44  * @author Jose Fonseca <jfonseca@vmware.com>
  45  */
  46
  47
  48 #include "util/u_memory.h"
  49 #include "util/u_debug.h"
  50 #include "util/u_string.h"
  51 #include "util/u_cpu_detect.h"
  52
  53 #include "lp_bld_type.h"
  54 #include "lp_bld_const.h"
  55 #include "lp_bld_intr.h"
  56 #include "lp_bld_logic.h"
  57 #include "lp_bld_arit.h"
  58
  59
  60 /**
  61  * Generate min(a, b)
  62  * No checks for special case values of a or b = 1 or 0 are done.
  63  */
  64 static LLVMValueRef
  65 lp_build_min_simple(struct lp_build_context *bld,
  66                     LLVMValueRef a,
  67                     LLVMValueRef b)
  68 {
  69    const struct lp_type type = bld->type;
  70    const char *intrinsic = NULL;
  71    LLVMValueRef cond;
  72
  73    /* TODO: optimize the constant case */
  74
  75 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  76    if(type.width * type.length == 128) {
  77       if(type.floating) {
  78          if(type.width == 32)
  79             intrinsic = "llvm.x86.sse.min.ps";
  80          if(type.width == 64)
  81             intrinsic = "llvm.x86.sse2.min.pd";
  82       }
  83       else {
  84          if(type.width == 8 && !type.sign)
  85             intrinsic = "llvm.x86.sse2.pminu.b";
  86          if(type.width == 8 && type.sign)
  87             intrinsic = "llvm.x86.sse41.pminsb";
  88          if(type.width == 16 && !type.sign)
  89             intrinsic = "llvm.x86.sse41.pminuw";
  90          if(type.width == 16 && type.sign)
  91             intrinsic = "llvm.x86.sse2.pmins.w";
  92          if(type.width == 32 && !type.sign)
  93             intrinsic = "llvm.x86.sse41.pminud";
  94          if(type.width == 32 && type.sign)
  95             intrinsic = "llvm.x86.sse41.pminsd";
  96       }
  97    }
  98 #endif
  99
 100    if(intrinsic)
 101       return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 102
 103    cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
 104    return lp_build_select(bld, cond, a, b);
 105 }
 106
 107
 108 /**
 109  * Generate max(a, b)
 110  * No checks for special case values of a or b = 1 or 0 are done.
 111  */
 112 static LLVMValueRef
 113 lp_build_max_simple(struct lp_build_context *bld,
 114                     LLVMValueRef a,
 115                     LLVMValueRef b)
 116 {
 117    const struct lp_type type = bld->type;
 118    const char *intrinsic = NULL;
 119    LLVMValueRef cond;
 120
 121    /* TODO: optimize the constant case */
 122
 123    if(type.width * type.length == 128) {
 124       if(type.floating) {
 125          if(type.width == 32 && util_cpu_caps.has_sse)
 126             intrinsic = "llvm.x86.sse.max.ps";
 127          if(type.width == 64 && util_cpu_caps.has_sse2)
 128             intrinsic = "llvm.x86.sse2.max.pd";
 129       }
 130       else {
 131          if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
 132             intrinsic = "llvm.x86.sse2.pmaxu.b";
 133          if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
 134             intrinsic = "llvm.x86.sse41.pmaxsb";
 135          if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
 136             intrinsic = "llvm.x86.sse41.pmaxuw";
 137          if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
 138             intrinsic = "llvm.x86.sse2.pmaxs.w";
 139          if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
 140             intrinsic = "llvm.x86.sse41.pmaxud";
 141          if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
 142             intrinsic = "llvm.x86.sse41.pmaxsd";
 143       }
 144    }
 145
 146    if(intrinsic)
 147       return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 148
 149    cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
 150    return lp_build_select(bld, cond, a, b);
 151 }
 152
 153
 154 /**
 155  * Generate 1 - a, or ~a depending on bld->type.
 156  */
 157 LLVMValueRef
 158 lp_build_comp(struct lp_build_context *bld,
 159               LLVMValueRef a)
 160 {
 161    const struct lp_type type = bld->type;
 162
 163    if(a == bld->one)
 164       return bld->zero;
 165    if(a == bld->zero)
 166       return bld->one;
 167
 168    if(type.norm && !type.floating && !type.fixed && !type.sign) {
 169       if(LLVMIsConstant(a))
 170          return LLVMConstNot(a);
 171       else
 172          return LLVMBuildNot(bld->builder, a, "");
 173    }
 174
 175    if(LLVMIsConstant(a))
 176       return LLVMConstSub(bld->one, a);
 177    else
 178       return LLVMBuildSub(bld->builder, bld->one, a, "");
 179 }
 180
 181
 182 /**
 183  * Generate a + b
 184  */
 185 LLVMValueRef
 186 lp_build_add(struct lp_build_context *bld,
 187              LLVMValueRef a,
 188              LLVMValueRef b)
 189 {
 190    const struct lp_type type = bld->type;
 191    LLVMValueRef res;
 192
 193    if(a == bld->zero)
 194       return b;
 195    if(b == bld->zero)
 196       return a;
 197    if(a == bld->undef || b == bld->undef)
 198       return bld->undef;
 199
 200    if(bld->type.norm) {
 201       const char *intrinsic = NULL;
 202
 203       if(a == bld->one || b == bld->one)
 204         return bld->one;
 205
 206       if(util_cpu_caps.has_sse2 &&
 207          type.width * type.length == 128 &&
 208          !type.floating && !type.fixed) {
 209          if(type.width == 8)
 210             intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
 211          if(type.width == 16)
 212             intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
 213       }
 214
 215       if(intrinsic)
 216          return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 217    }
 218
 219    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 220       res = LLVMConstAdd(a, b);
 221    else
 222       res = LLVMBuildAdd(bld->builder, a, b, "");
 223
 224    /* clamp to ceiling of 1.0 */
 225    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 226       res = lp_build_min_simple(bld, res, bld->one);
 227
 228    /* XXX clamp to floor of -1 or 0??? */
 229
 230    return res;
 231 }
 232
 233
 234 /**
 235  * Generate a - b
 236  */
 237 LLVMValueRef
 238 lp_build_sub(struct lp_build_context *bld,
 239              LLVMValueRef a,
 240              LLVMValueRef b)
 241 {
 242    const struct lp_type type = bld->type;
 243    LLVMValueRef res;
 244
 245    if(b == bld->zero)
 246       return a;
 247    if(a == bld->undef || b == bld->undef)
 248       return bld->undef;
 249    if(a == b)
 250       return bld->zero;
 251
 252    if(bld->type.norm) {
 253       const char *intrinsic = NULL;
 254
 255       if(b == bld->one)
 256         return bld->zero;
 257
 258       if(util_cpu_caps.has_sse2 &&
 259          type.width * type.length == 128 &&
 260          !type.floating && !type.fixed) {
 261          if(type.width == 8)
 262             intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
 263          if(type.width == 16)
 264             intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
 265       }
 266
 267       if(intrinsic)
 268          return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 269    }
 270
 271    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 272       res = LLVMConstSub(a, b);
 273    else
 274       res = LLVMBuildSub(bld->builder, a, b, "");
 275
 276    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 277       res = lp_build_max_simple(bld, res, bld->zero);
 278
 279    return res;
 280 }
 281
 282
 283 /**
 284  * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
 285  */
 286 static LLVMValueRef
 287 lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
 288 {
 289    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 290    unsigned i, j;
 291
 292    assert(n <= LP_MAX_VECTOR_LENGTH);
 293    assert(lo_hi < 2);
 294
 295    for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
 296       elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
 297       elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
 298    }
 299
 300    return LLVMConstVector(elems, n);
 301 }
 302
 303
 304 /**
 305  * Build constant int vector of width 'n' and value 'c'.
 306  */
 307 static LLVMValueRef
 308 lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
 309 {
 310    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 311    unsigned i;
 312
 313    assert(n <= LP_MAX_VECTOR_LENGTH);
 314
 315    for(i = 0; i < n; ++i)
 316       elems[i] = LLVMConstInt(type, c, 0);
 317
 318    return LLVMConstVector(elems, n);
 319 }
 320
 321
 322 /**
 323  * Normalized 8bit multiplication.
 324  *
 325  * - alpha plus one
 326  *
 327  *     makes the following approximation to the division (Sree)
 328  *
 329  *       a*b/255 ~= (a*(b + 1)) >> 256
 330  *
 331  *     which is the fastest method that satisfies the following OpenGL criteria
 332  *
 333  *       0*0 = 0 and 255*255 = 255
 334  *
 335  * - geometric series
 336  *
 337  *     takes the geometric series approximation to the division
 338  *
 339  *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 340  *
 341  *     in this case just the first two terms to fit in 16bit arithmetic
 342  *
 343  *       t/255 ~= (t + (t >> 8)) >> 8
 344  *
 345  *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 346  *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 347  *     must be used
 348  *
 349  * - geometric series plus rounding
 350  *
 351  *     when using a geometric series division instead of truncating the result
 352  *     use roundoff in the approximation (Jim Blinn)
 353  *
 354  *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 355  *
 356  *     achieving the exact results
 357  *
 358  * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
 359  *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 360  * @sa Michael Herf, The "double blend trick", May 2000,
 361  *     http://www.stereopsis.com/doubleblend.html
 362  */
 363 static LLVMValueRef
 364 lp_build_mul_u8n(LLVMBuilderRef builder,
 365                  LLVMValueRef a, LLVMValueRef b)
 366 {
 367    static LLVMValueRef c01 = NULL;
 368    static LLVMValueRef c08 = NULL;
 369    static LLVMValueRef c80 = NULL;
 370    LLVMValueRef ab;
 371
 372    if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
 373    if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
 374    if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
 375
 376 #if 0
 377
 378    /* a*b/255 ~= (a*(b + 1)) >> 256 */
 379    b = LLVMBuildAdd(builder, b, c01, "");
 380    ab = LLVMBuildMul(builder, a, b, "");
 381
 382 #else
 383
 384    /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
 385    ab = LLVMBuildMul(builder, a, b, "");
 386    ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
 387    ab = LLVMBuildAdd(builder, ab, c80, "");
 388
 389 #endif
 390
 391    ab = LLVMBuildLShr(builder, ab, c08, "");
 392
 393    return ab;
 394 }
 395
 396
 397 /**
 398  * Generate a * b
 399  */
 400 LLVMValueRef
 401 lp_build_mul(struct lp_build_context *bld,
 402              LLVMValueRef a,
 403              LLVMValueRef b)
 404 {
 405    const struct lp_type type = bld->type;
 406
 407    if(a == bld->zero)
 408       return bld->zero;
 409    if(a == bld->one)
 410       return b;
 411    if(b == bld->zero)
 412       return bld->zero;
 413    if(b == bld->one)
 414       return a;
 415    if(a == bld->undef || b == bld->undef)
 416       return bld->undef;
 417
 418    if(!type.floating && !type.fixed && type.norm) {
 419       if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) {
 420          LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
 421          LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
 422          static LLVMValueRef ml = NULL;
 423          static LLVMValueRef mh = NULL;
 424          LLVMValueRef al, ah, bl, bh;
 425          LLVMValueRef abl, abh;
 426          LLVMValueRef ab;
 427
 428          if(!ml) ml = lp_build_unpack_shuffle(16, 0);
 429          if(!mh) mh = lp_build_unpack_shuffle(16, 1);
 430
 431          /*  PUNPCKLBW, PUNPCKHBW */
 432          al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
 433          bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
 434          ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
 435          bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
 436
 437          /* NOP */
 438          al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
 439          bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
 440          ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
 441          bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
 442
 443          /* PMULLW, PSRLW, PADDW */
 444          abl = lp_build_mul_u8n(bld->builder, al, bl);
 445          abh = lp_build_mul_u8n(bld->builder, ah, bh);
 446
 447          /* PACKUSWB */
 448          ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
 449
 450          /* NOP */
 451          ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
 452
 453          return ab;
 454       }
 455
 456       /* FIXME */
 457       assert(0);
 458    }
 459
 460    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 461       return LLVMConstMul(a, b);
 462
 463    return LLVMBuildMul(bld->builder, a, b, "");
 464 }
 465
 466
 467 /**
 468  * Generate a / b
 469  */
 470 LLVMValueRef
 471 lp_build_div(struct lp_build_context *bld,
 472              LLVMValueRef a,
 473              LLVMValueRef b)
 474 {
 475    const struct lp_type type = bld->type;
 476
 477    if(a == bld->zero)
 478       return bld->zero;
 479    if(a == bld->one)
 480       return lp_build_rcp(bld, b);
 481    if(b == bld->zero)
 482       return bld->undef;
 483    if(b == bld->one)
 484       return a;
 485    if(a == bld->undef || b == bld->undef)
 486       return bld->undef;
 487
 488    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 489       return LLVMConstFDiv(a, b);
 490
 491    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
 492       return lp_build_mul(bld, a, lp_build_rcp(bld, b));
 493
 494    return LLVMBuildFDiv(bld->builder, a, b, "");
 495 }
 496
 497
 498 LLVMValueRef
 499 lp_build_lerp(struct lp_build_context *bld,
 500               LLVMValueRef x,
 501               LLVMValueRef v0,
 502               LLVMValueRef v1)
 503 {
 504    return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
 505 }
 506
 507
 508 LLVMValueRef
 509 lp_build_lerp_2d(struct lp_build_context *bld,
 510                  LLVMValueRef x,
 511                  LLVMValueRef y,
 512                  LLVMValueRef v00,
 513                  LLVMValueRef v01,
 514                  LLVMValueRef v10,
 515                  LLVMValueRef v11)
 516 {
 517    LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
 518    LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
 519    return lp_build_lerp(bld, y, v0, v1);
 520 }
 521
 522
 523 /**
 524  * Generate min(a, b)
 525  * Do checks for special cases.
 526  */
 527 LLVMValueRef
 528 lp_build_min(struct lp_build_context *bld,
 529              LLVMValueRef a,
 530              LLVMValueRef b)
 531 {
 532    if(a == bld->undef || b == bld->undef)
 533       return bld->undef;
 534
 535    if(a == b)
 536       return a;
 537
 538    if(bld->type.norm) {
 539       if(a == bld->zero || b == bld->zero)
 540          return bld->zero;
 541       if(a == bld->one)
 542          return b;
 543       if(b == bld->one)
 544          return a;
 545    }
 546
 547    return lp_build_min_simple(bld, a, b);
 548 }
 549
 550
 551 /**
 552  * Generate max(a, b)
 553  * Do checks for special cases.
 554  */
 555 LLVMValueRef
 556 lp_build_max(struct lp_build_context *bld,
 557              LLVMValueRef a,
 558              LLVMValueRef b)
 559 {
 560    if(a == bld->undef || b == bld->undef)
 561       return bld->undef;
 562
 563    if(a == b)
 564       return a;
 565
 566    if(bld->type.norm) {
 567       if(a == bld->one || b == bld->one)
 568          return bld->one;
 569       if(a == bld->zero)
 570          return b;
 571       if(b == bld->zero)
 572          return a;
 573    }
 574
 575    return lp_build_max_simple(bld, a, b);
 576 }
 577
 578
 579 /**
 580  * Generate abs(a)
 581  */
 582 LLVMValueRef
 583 lp_build_abs(struct lp_build_context *bld,
 584              LLVMValueRef a)
 585 {
 586    const struct lp_type type = bld->type;
 587    LLVMTypeRef vec_type = lp_build_vec_type(type);
 588
 589    if(!type.sign)
 590       return a;
 591
 592    if(type.floating) {
 593       /* Mask out the sign bit */
 594       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 595       LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
 596       a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 597       a = LLVMBuildAnd(bld->builder, a, mask, "");
 598       a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
 599       return a;
 600    }
 601
 602    if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
 603       switch(type.width) {
 604       case 8:
 605          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
 606       case 16:
 607          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
 608       case 32:
 609          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
 610       }
 611    }
 612
 613    return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
 614 }
 615
 616
 617 LLVMValueRef
 618 lp_build_sgn(struct lp_build_context *bld,
 619              LLVMValueRef a)
 620 {
 621    const struct lp_type type = bld->type;
 622    LLVMTypeRef vec_type = lp_build_vec_type(type);
 623    LLVMValueRef cond;
 624    LLVMValueRef res;
 625
 626    /* Handle non-zero case */
 627    if(!type.sign) {
 628       /* if not zero then sign must be positive */
 629       res = bld->one;
 630    }
 631    else if(type.floating) {
 632       /* Take the sign bit and add it to 1 constant */
 633       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 634       LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 635       LLVMValueRef sign;
 636       LLVMValueRef one;
 637       sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 638       sign = LLVMBuildAnd(bld->builder, sign, mask, "");
 639       one = LLVMConstBitCast(bld->one, int_vec_type);
 640       res = LLVMBuildOr(bld->builder, sign, one, "");
 641       res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
 642    }
 643    else
 644    {
 645       LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
 646       cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
 647       res = lp_build_select(bld, cond, bld->one, minus_one);
 648    }
 649
 650    /* Handle zero */
 651    cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
 652    res = lp_build_select(bld, cond, bld->zero, bld->one);
 653
 654    return res;
 655 }
 656
 657
 658 enum lp_build_round_sse41_mode
 659 {
 660    LP_BUILD_ROUND_SSE41_NEAREST = 0,
 661    LP_BUILD_ROUND_SSE41_FLOOR = 1,
 662    LP_BUILD_ROUND_SSE41_CEIL = 2,
 663    LP_BUILD_ROUND_SSE41_TRUNCATE = 3
 664 };
 665
 666
 667 static INLINE LLVMValueRef
 668 lp_build_round_sse41(struct lp_build_context *bld,
 669                      LLVMValueRef a,
 670                      enum lp_build_round_sse41_mode mode)
 671 {
 672    const struct lp_type type = bld->type;
 673    LLVMTypeRef vec_type = lp_build_vec_type(type);
 674    const char *intrinsic;
 675
 676    assert(type.floating);
 677    assert(type.width*type.length == 128);
 678    assert(lp_check_value(type, a));
 679    assert(util_cpu_caps.has_sse4_1);
 680
 681    switch(type.width) {
 682    case 32:
 683       intrinsic = "llvm.x86.sse41.round.ps";
 684       break;
 685    case 64:
 686       intrinsic = "llvm.x86.sse41.round.pd";
 687       break;
 688    default:
 689       assert(0);
 690       return bld->undef;
 691    }
 692
 693    return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
 694                                     LLVMConstInt(LLVMInt32Type(), mode, 0));
 695 }
 696
 697
 698 LLVMValueRef
 699 lp_build_trunc(struct lp_build_context *bld,
 700                LLVMValueRef a)
 701 {
 702    const struct lp_type type = bld->type;
 703
 704    assert(type.floating);
 705    assert(lp_check_value(type, a));
 706
 707    if(util_cpu_caps.has_sse4_1)
 708       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
 709    else {
 710       LLVMTypeRef vec_type = lp_build_vec_type(type);
 711       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 712       LLVMValueRef res;
 713       res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
 714       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 715       return res;
 716    }
 717 }
 718
 719
 720 LLVMValueRef
 721 lp_build_round(struct lp_build_context *bld,
 722                LLVMValueRef a)
 723 {
 724    const struct lp_type type = bld->type;
 725
 726    assert(type.floating);
 727    assert(lp_check_value(type, a));
 728
 729    if(util_cpu_caps.has_sse4_1)
 730       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
 731    else {
 732       LLVMTypeRef vec_type = lp_build_vec_type(type);
 733       LLVMValueRef res;
 734       res = lp_build_iround(bld, a);
 735       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 736       return res;
 737    }
 738 }
 739
 740
 741 LLVMValueRef
 742 lp_build_floor(struct lp_build_context *bld,
 743                LLVMValueRef a)
 744 {
 745    const struct lp_type type = bld->type;
 746
 747    assert(type.floating);
 748
 749    if(util_cpu_caps.has_sse4_1)
 750       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
 751    else {
 752       LLVMTypeRef vec_type = lp_build_vec_type(type);
 753       LLVMValueRef res;
 754       res = lp_build_ifloor(bld, a);
 755       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 756       return res;
 757    }
 758 }
 759
 760
 761 LLVMValueRef
 762 lp_build_ceil(struct lp_build_context *bld,
 763               LLVMValueRef a)
 764 {
 765    const struct lp_type type = bld->type;
 766
 767    assert(type.floating);
 768    assert(lp_check_value(type, a));
 769
 770    if(util_cpu_caps.has_sse4_1)
 771       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
 772    else {
 773       LLVMTypeRef vec_type = lp_build_vec_type(type);
 774       LLVMValueRef res;
 775       res = lp_build_iceil(bld, a);
 776       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 777       return res;
 778    }
 779 }
 780
 781
 782 /**
 783  * Convert to integer, through whichever rounding method that's fastest,
 784  * typically truncating to zero.
 785  */
 786 LLVMValueRef
 787 lp_build_itrunc(struct lp_build_context *bld,
 788                 LLVMValueRef a)
 789 {
 790    const struct lp_type type = bld->type;
 791    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 792
 793    assert(type.floating);
 794    assert(lp_check_value(type, a));
 795
 796    return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
 797 }
 798
 799
 800 LLVMValueRef
 801 lp_build_iround(struct lp_build_context *bld,
 802                 LLVMValueRef a)
 803 {
 804    const struct lp_type type = bld->type;
 805    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 806    LLVMValueRef res;
 807
 808    assert(type.floating);
 809    assert(lp_check_value(type, a));
 810
 811    if(util_cpu_caps.has_sse4_1) {
 812       res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
 813    }
 814    else {
 815       LLVMTypeRef vec_type = lp_build_vec_type(type);
 816       LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 817       LLVMValueRef sign;
 818       LLVMValueRef half;
 819
 820       /* get sign bit */
 821       sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 822       sign = LLVMBuildAnd(bld->builder, sign, mask, "");
 823
 824       /* sign * 0.5 */
 825       half = lp_build_const_scalar(type, 0.5);
 826       half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
 827       half = LLVMBuildOr(bld->builder, sign, half, "");
 828       half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
 829
 830       res = LLVMBuildAdd(bld->builder, a, half, "");
 831    }
 832
 833    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
 834
 835    return res;
 836 }
 837
 838
 839 LLVMValueRef
 840 lp_build_ifloor(struct lp_build_context *bld,
 841                 LLVMValueRef a)
 842 {
 843    const struct lp_type type = bld->type;
 844    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 845    LLVMValueRef res;
 846
 847    assert(type.floating);
 848    assert(lp_check_value(type, a));
 849
 850    if(util_cpu_caps.has_sse4_1) {
 851       res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
 852    }
 853    else {
 854       /* Take the sign bit and add it to 1 constant */
 855       LLVMTypeRef vec_type = lp_build_vec_type(type);
 856       unsigned mantissa = lp_mantissa(type);
 857       LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 858       LLVMValueRef sign;
 859       LLVMValueRef offset;
 860
 861       /* sign = a < 0 ? ~0 : 0 */
 862       sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 863       sign = LLVMBuildAnd(bld->builder, sign, mask, "");
 864       sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
 865
 866       /* offset = -0.99999(9)f */
 867       offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
 868       offset = LLVMConstBitCast(offset, int_vec_type);
 869
 870       /* offset = a < 0 ? -0.99999(9)f : 0.0f */
 871       offset = LLVMBuildAnd(bld->builder, offset, sign, "");
 872       offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
 873
 874       res = LLVMBuildAdd(bld->builder, a, offset, "");
 875    }
 876
 877    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
 878
 879    return res;
 880 }
 881
 882
 883 LLVMValueRef
 884 lp_build_iceil(struct lp_build_context *bld,
 885                LLVMValueRef a)
 886 {
 887    const struct lp_type type = bld->type;
 888    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 889    LLVMValueRef res;
 890
 891    assert(type.floating);
 892    assert(lp_check_value(type, a));
 893
 894    if(util_cpu_caps.has_sse4_1) {
 895       res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
 896    }
 897    else {
 898       assert(0);
 899       res = bld->undef;
 900    }
 901
 902    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
 903
 904    return res;
 905 }
 906
 907
 908 LLVMValueRef
 909 lp_build_sqrt(struct lp_build_context *bld,
 910               LLVMValueRef a)
 911 {
 912    const struct lp_type type = bld->type;
 913    LLVMTypeRef vec_type = lp_build_vec_type(type);
 914    char intrinsic[32];
 915
 916    /* TODO: optimize the constant case */
 917    /* TODO: optimize the constant case */
 918
 919    assert(type.floating);
 920    util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
 921
 922    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 923 }
 924
 925
 926 LLVMValueRef
 927 lp_build_rcp(struct lp_build_context *bld,
 928              LLVMValueRef a)
 929 {
 930    const struct lp_type type = bld->type;
 931
 932    if(a == bld->zero)
 933       return bld->undef;
 934    if(a == bld->one)
 935       return bld->one;
 936    if(a == bld->undef)
 937       return bld->undef;
 938
 939    assert(type.floating);
 940
 941    if(LLVMIsConstant(a))
 942       return LLVMConstFDiv(bld->one, a);
 943
 944    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
 945       /* FIXME: improve precision */
 946       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
 947
 948    return LLVMBuildFDiv(bld->builder, bld->one, a, "");
 949 }
 950
 951
 952 /**
 953  * Generate 1/sqrt(a)
 954  */
 955 LLVMValueRef
 956 lp_build_rsqrt(struct lp_build_context *bld,
 957                LLVMValueRef a)
 958 {
 959    const struct lp_type type = bld->type;
 960
 961    assert(type.floating);
 962
 963    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
 964       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
 965
 966    return lp_build_rcp(bld, lp_build_sqrt(bld, a));
 967 }
 968
 969
 970 /**
 971  * Generate cos(a)
 972  */
 973 LLVMValueRef
 974 lp_build_cos(struct lp_build_context *bld,
 975               LLVMValueRef a)
 976 {
 977    const struct lp_type type = bld->type;
 978    LLVMTypeRef vec_type = lp_build_vec_type(type);
 979    char intrinsic[32];
 980
 981    /* TODO: optimize the constant case */
 982
 983    assert(type.floating);
 984    util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
 985
 986    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 987 }
 988
 989
 990 /**
 991  * Generate sin(a)
 992  */
 993 LLVMValueRef
 994 lp_build_sin(struct lp_build_context *bld,
 995               LLVMValueRef a)
 996 {
 997    const struct lp_type type = bld->type;
 998    LLVMTypeRef vec_type = lp_build_vec_type(type);
 999    char intrinsic[32];
1000
1001    /* TODO: optimize the constant case */
1002
1003    assert(type.floating);
1004    util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
1005
1006    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
1007 }
1008
1009
1010 /**
1011  * Generate pow(x, y)
1012  */
1013 LLVMValueRef
1014 lp_build_pow(struct lp_build_context *bld,
1015              LLVMValueRef x,
1016              LLVMValueRef y)
1017 {
1018    /* TODO: optimize the constant case */
1019    if(LLVMIsConstant(x) && LLVMIsConstant(y))
1020       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1021                    __FUNCTION__);
1022
1023    return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
1024 }
1025
1026
1027 /**
1028  * Generate exp(x)
1029  */
1030 LLVMValueRef
1031 lp_build_exp(struct lp_build_context *bld,
1032              LLVMValueRef x)
1033 {
1034    /* log2(e) = 1/log(2) */
1035    LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634);
1036
1037    return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
1038 }
1039
1040
1041 /**
1042  * Generate log(x)
1043  */
1044 LLVMValueRef
1045 lp_build_log(struct lp_build_context *bld,
1046              LLVMValueRef x)
1047 {
1048    /* log(2) */
1049    LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
1050
1051    return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
1052 }
1053
1054
1055 #define EXP_POLY_DEGREE 3
1056 #define LOG_POLY_DEGREE 5
1057
1058
1059 /**
1060  * Generate polynomial.
1061  * Ex:  x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
1062  */
1063 static LLVMValueRef
1064 lp_build_polynomial(struct lp_build_context *bld,
1065                     LLVMValueRef x,
1066                     const double *coeffs,
1067                     unsigned num_coeffs)
1068 {
1069    const struct lp_type type = bld->type;
1070    LLVMValueRef res = NULL;
1071    unsigned i;
1072
1073    /* TODO: optimize the constant case */
1074    if(LLVMIsConstant(x))
1075       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1076                    __FUNCTION__);
1077
1078    for (i = num_coeffs; i--; ) {
1079       LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
1080       if(res)
1081          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
1082       else
1083          res = coeff;
1084    }
1085
1086    if(res)
1087       return res;
1088    else
1089       return bld->undef;
1090 }
1091
1092
1093 /**
1094  * Minimax polynomial fit of 2**x, in range [-0.5, 0.5[
1095  */
1096 const double lp_build_exp2_polynomial[] = {
1097 #if EXP_POLY_DEGREE == 5
1098    9.9999994e-1, 6.9315308e-1, 2.4015361e-1, 5.5826318e-2, 8.9893397e-3, 1.8775767e-3
1099 #elif EXP_POLY_DEGREE == 4
1100    1.0000026, 6.9300383e-1, 2.4144275e-1, 5.2011464e-2, 1.3534167e-2
1101 #elif EXP_POLY_DEGREE == 3
1102    9.9992520e-1, 6.9583356e-1, 2.2606716e-1, 7.8024521e-2
1103 #elif EXP_POLY_DEGREE == 2
1104    1.0017247, 6.5763628e-1, 3.3718944e-1
1105 #else
1106 #error
1107 #endif
1108 };
1109
1110
1111 void
1112 lp_build_exp2_approx(struct lp_build_context *bld,
1113                      LLVMValueRef x,
1114                      LLVMValueRef *p_exp2_int_part,
1115                      LLVMValueRef *p_frac_part,
1116                      LLVMValueRef *p_exp2)
1117 {
1118    const struct lp_type type = bld->type;
1119    LLVMTypeRef vec_type = lp_build_vec_type(type);
1120    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
1121    LLVMValueRef ipart = NULL;
1122    LLVMValueRef fpart = NULL;
1123    LLVMValueRef expipart = NULL;
1124    LLVMValueRef expfpart = NULL;
1125    LLVMValueRef res = NULL;
1126
1127    if(p_exp2_int_part || p_frac_part || p_exp2) {
1128       /* TODO: optimize the constant case */
1129       if(LLVMIsConstant(x))
1130          debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1131                       __FUNCTION__);
1132
1133       assert(type.floating && type.width == 32);
1134
1135       x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
1136       x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));
1137
1138       /* ipart = int(x - 0.5) */
1139       ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
1140       ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
1141
1142       /* fpart = x - ipart */
1143       fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
1144       fpart = LLVMBuildSub(bld->builder, x, fpart, "");
1145    }
1146
1147    if(p_exp2_int_part || p_exp2) {
1148       /* expipart = (float) (1 << ipart) */
1149       expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
1150       expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
1151       expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
1152    }
1153
1154    if(p_exp2) {
1155       expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
1156                                      Elements(lp_build_exp2_polynomial));
1157
1158       res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
1159    }
1160
1161    if(p_exp2_int_part)
1162       *p_exp2_int_part = expipart;
1163
1164    if(p_frac_part)
1165       *p_frac_part = fpart;
1166
1167    if(p_exp2)
1168       *p_exp2 = res;
1169 }
1170
1171
1172 LLVMValueRef
1173 lp_build_exp2(struct lp_build_context *bld,
1174               LLVMValueRef x)
1175 {
1176    LLVMValueRef res;
1177    lp_build_exp2_approx(bld, x, NULL, NULL, &res);
1178    return res;
1179 }
1180
1181
1182 /**
1183  * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
1184  * These coefficients can be generate with
1185  * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
1186  */
1187 const double lp_build_log2_polynomial[] = {
1188 #if LOG_POLY_DEGREE == 6
1189    3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313
1190 #elif LOG_POLY_DEGREE == 5
1191    2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533
1192 #elif LOG_POLY_DEGREE == 4
1193    2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454
1194 #elif LOG_POLY_DEGREE == 3
1195    2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516
1196 #else
1197 #error
1198 #endif
1199 };
1200
1201
1202 /**
1203  * See http://www.devmaster.net/forums/showthread.php?p=43580
1204  */
1205 void
1206 lp_build_log2_approx(struct lp_build_context *bld,
1207                      LLVMValueRef x,
1208                      LLVMValueRef *p_exp,
1209                      LLVMValueRef *p_floor_log2,
1210                      LLVMValueRef *p_log2)
1211 {
1212    const struct lp_type type = bld->type;
1213    LLVMTypeRef vec_type = lp_build_vec_type(type);
1214    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
1215
1216    LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
1217    LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
1218    LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
1219
1220    LLVMValueRef i = NULL;
1221    LLVMValueRef exp = NULL;
1222    LLVMValueRef mant = NULL;
1223    LLVMValueRef logexp = NULL;
1224    LLVMValueRef logmant = NULL;
1225    LLVMValueRef res = NULL;
1226
1227    if(p_exp || p_floor_log2 || p_log2) {
1228       /* TODO: optimize the constant case */
1229       if(LLVMIsConstant(x))
1230          debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1231                       __FUNCTION__);
1232
1233       assert(type.floating && type.width == 32);
1234
1235       i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
1236
1237       /* exp = (float) exponent(x) */
1238       exp = LLVMBuildAnd(bld->builder, i, expmask, "");
1239    }
1240
1241    if(p_floor_log2 || p_log2) {
1242       logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
1243       logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
1244       logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
1245    }
1246
1247    if(p_log2) {
1248       /* mant = (float) mantissa(x) */
1249       mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
1250       mant = LLVMBuildOr(bld->builder, mant, one, "");
1251       mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
1252
1253       logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
1254                                     Elements(lp_build_log2_polynomial));
1255
1256       /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
1257       logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
1258
1259       res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
1260    }
1261
1262    if(p_exp)
1263       *p_exp = exp;
1264
1265    if(p_floor_log2)
1266       *p_floor_log2 = logexp;
1267
1268    if(p_log2)
1269       *p_log2 = res;
1270 }
1271
1272
1273 LLVMValueRef
1274 lp_build_log2(struct lp_build_context *bld,
1275               LLVMValueRef x)
1276 {
1277    LLVMValueRef res;
1278    lp_build_log2_approx(bld, x, NULL, NULL, &res);
1279    return res;
1280 }