src/gallium/drivers/llvmpipe/lp_bld_arit.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * Helper
  32  *
  33  * LLVM IR doesn't support all basic arithmetic operations we care about (most
  34  * notably min/max and saturated operations), and it is often necessary to
  35  * resort machine-specific intrinsics directly. The functions here hide all
  36  * these implementation details from the other modules.
  37  *
  38  * We also do simple expressions simplification here. Reasons are:
  39  * - it is very easy given we have all necessary information readily available
  40  * - LLVM optimization passes fail to simplify several vector expressions
  41  * - We often know value constraints which the optimization passes have no way
  42  *   of knowing, such as when source arguments are known to be in [0, 1] range.
  43  *
  44  * @author Jose Fonseca <jfonseca@vmware.com>
  45  */
  46
  47
  48 #include "util/u_debug.h"
  49 #include "util/u_string.h"
  50
  51 #include "lp_bld_type.h"
  52 #include "lp_bld_const.h"
  53 #include "lp_bld_intr.h"
  54 #include "lp_bld_arit.h"
  55
  56
  57 static LLVMValueRef
  58 lp_build_min_simple(struct lp_build_context *bld,
  59                     LLVMValueRef a,
  60                     LLVMValueRef b)
  61 {
  62    const union lp_type type = bld->type;
  63    const char *intrinsic = NULL;
  64    LLVMValueRef cond;
  65
  66    /* TODO: optimize the constant case */
  67
  68 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  69    if(type.width * type.length == 128) {
  70       if(type.floating) {
  71          if(type.width == 32)
  72             intrinsic = "llvm.x86.sse.min.ps";
  73          if(type.width == 64)
  74             intrinsic = "llvm.x86.sse2.min.pd";
  75       }
  76       else {
  77          if(type.width == 8 && !type.sign)
  78             intrinsic = "llvm.x86.sse2.pminu.b";
  79          if(type.width == 8 && type.sign)
  80             intrinsic = "llvm.x86.sse41.pminsb";
  81          if(type.width == 16 && !type.sign)
  82             intrinsic = "llvm.x86.sse41.pminuw";
  83          if(type.width == 16 && type.sign)
  84             intrinsic = "llvm.x86.sse2.pmins.w";
  85          if(type.width == 32 && !type.sign)
  86             intrinsic = "llvm.x86.sse41.pminud";
  87          if(type.width == 32 && type.sign)
  88             intrinsic = "llvm.x86.sse41.pminsd";
  89       }
  90    }
  91 #endif
  92
  93    if(intrinsic)
  94       return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
  95
  96    if(type.floating)
  97       cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
  98    else
  99       cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
 100    return LLVMBuildSelect(bld->builder, cond, a, b, "");
 101 }
 102
 103
 104 static LLVMValueRef
 105 lp_build_max_simple(struct lp_build_context *bld,
 106                     LLVMValueRef a,
 107                     LLVMValueRef b)
 108 {
 109    const union lp_type type = bld->type;
 110    const char *intrinsic = NULL;
 111    LLVMValueRef cond;
 112
 113    /* TODO: optimize the constant case */
 114
 115 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 116    if(type.width * type.length == 128) {
 117       if(type.floating) {
 118          if(type.width == 32)
 119             intrinsic = "llvm.x86.sse.max.ps";
 120          if(type.width == 64)
 121             intrinsic = "llvm.x86.sse2.max.pd";
 122       }
 123       else {
 124          if(type.width == 8 && !type.sign)
 125             intrinsic = "llvm.x86.sse2.pmaxu.b";
 126          if(type.width == 8 && type.sign)
 127             intrinsic = "llvm.x86.sse41.pmaxsb";
 128          if(type.width == 16 && !type.sign)
 129             intrinsic = "llvm.x86.sse41.pmaxuw";
 130          if(type.width == 16 && type.sign)
 131             intrinsic = "llvm.x86.sse2.pmaxs.w";
 132          if(type.width == 32 && !type.sign)
 133             intrinsic = "llvm.x86.sse41.pmaxud";
 134          if(type.width == 32 && type.sign)
 135             intrinsic = "llvm.x86.sse41.pmaxsd";
 136       }
 137    }
 138 #endif
 139
 140    if(intrinsic)
 141       return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 142
 143    if(type.floating)
 144       cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
 145    else
 146       cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
 147    return LLVMBuildSelect(bld->builder, cond, b, a, "");
 148 }
 149
 150
 151 LLVMValueRef
 152 lp_build_comp(struct lp_build_context *bld,
 153               LLVMValueRef a)
 154 {
 155    const union lp_type type = bld->type;
 156
 157    if(a == bld->one)
 158       return bld->zero;
 159    if(a == bld->zero)
 160       return bld->one;
 161
 162    if(type.norm && !type.floating && !type.fixed && !type.sign) {
 163       if(LLVMIsConstant(a))
 164          return LLVMConstNot(a);
 165       else
 166          return LLVMBuildNot(bld->builder, a, "");
 167    }
 168
 169    if(LLVMIsConstant(a))
 170       return LLVMConstSub(bld->one, a);
 171    else
 172       return LLVMBuildSub(bld->builder, bld->one, a, "");
 173 }
 174
 175
 176 LLVMValueRef
 177 lp_build_add(struct lp_build_context *bld,
 178              LLVMValueRef a,
 179              LLVMValueRef b)
 180 {
 181    const union lp_type type = bld->type;
 182    LLVMValueRef res;
 183
 184    if(a == bld->zero)
 185       return b;
 186    if(b == bld->zero)
 187       return a;
 188    if(a == bld->undef || b == bld->undef)
 189       return bld->undef;
 190
 191    if(bld->type.norm) {
 192       const char *intrinsic = NULL;
 193
 194       if(a == bld->one || b == bld->one)
 195         return bld->one;
 196
 197 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 198       if(type.width * type.length == 128 &&
 199          !type.floating && !type.fixed) {
 200          if(type.width == 8)
 201             intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
 202          if(type.width == 16)
 203             intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
 204       }
 205 #endif
 206
 207       if(intrinsic)
 208          return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 209    }
 210
 211    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 212       res = LLVMConstAdd(a, b);
 213    else
 214       res = LLVMBuildAdd(bld->builder, a, b, "");
 215
 216    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 217       res = lp_build_min_simple(bld, res, bld->one);
 218
 219    return res;
 220 }
 221
 222
 223 LLVMValueRef
 224 lp_build_sub(struct lp_build_context *bld,
 225              LLVMValueRef a,
 226              LLVMValueRef b)
 227 {
 228    const union lp_type type = bld->type;
 229    LLVMValueRef res;
 230
 231    if(b == bld->zero)
 232       return a;
 233    if(a == bld->undef || b == bld->undef)
 234       return bld->undef;
 235    if(a == b)
 236       return bld->zero;
 237
 238    if(bld->type.norm) {
 239       const char *intrinsic = NULL;
 240
 241       if(b == bld->one)
 242         return bld->zero;
 243
 244 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 245       if(type.width * type.length == 128 &&
 246          !type.floating && !type.fixed) {
 247          if(type.width == 8)
 248             intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
 249          if(type.width == 16)
 250             intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
 251       }
 252 #endif
 253
 254       if(intrinsic)
 255          return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 256    }
 257
 258    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 259       res = LLVMConstSub(a, b);
 260    else
 261       res = LLVMBuildSub(bld->builder, a, b, "");
 262
 263    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 264       res = lp_build_max_simple(bld, res, bld->zero);
 265
 266    return res;
 267 }
 268
 269
 270 /**
 271  * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
 272  */
 273 static LLVMValueRef
 274 lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
 275 {
 276    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 277    unsigned i, j;
 278
 279    assert(n <= LP_MAX_VECTOR_LENGTH);
 280    assert(lo_hi < 2);
 281
 282    for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
 283       elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
 284       elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
 285    }
 286
 287    return LLVMConstVector(elems, n);
 288 }
 289
 290
 291 static LLVMValueRef
 292 lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
 293 {
 294    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 295    unsigned i;
 296
 297    assert(n <= LP_MAX_VECTOR_LENGTH);
 298
 299    for(i = 0; i < n; ++i)
 300       elems[i] = LLVMConstInt(type, c, 0);
 301
 302    return LLVMConstVector(elems, n);
 303 }
 304
 305
 306 /**
 307  * Normalized 8bit multiplication.
 308  *
 309  * - alpha plus one
 310  *
 311  *     makes the following approximation to the division (Sree)
 312  *
 313  *       a*b/255 ~= (a*(b + 1)) >> 256
 314  *
 315  *     which is the fastest method that satisfies the following OpenGL criteria
 316  *
 317  *       0*0 = 0 and 255*255 = 255
 318  *
 319  * - geometric series
 320  *
 321  *     takes the geometric series approximation to the division
 322  *
 323  *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 324  *
 325  *     in this case just the first two terms to fit in 16bit arithmetic
 326  *
 327  *       t/255 ~= (t + (t >> 8)) >> 8
 328  *
 329  *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 330  *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 331  *     must be used
 332  *
 333  * - geometric series plus rounding
 334  *
 335  *     when using a geometric series division instead of truncating the result
 336  *     use roundoff in the approximation (Jim Blinn)
 337  *
 338  *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 339  *
 340  *     achieving the exact results
 341  *
 342  * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
 343  *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 344  * @sa Michael Herf, The "double blend trick", May 2000,
 345  *     http://www.stereopsis.com/doubleblend.html
 346  */
 347 static LLVMValueRef
 348 lp_build_mul_u8n(LLVMBuilderRef builder,
 349                  LLVMValueRef a, LLVMValueRef b)
 350 {
 351    static LLVMValueRef c01 = NULL;
 352    static LLVMValueRef c08 = NULL;
 353    static LLVMValueRef c80 = NULL;
 354    LLVMValueRef ab;
 355
 356    if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
 357    if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
 358    if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
 359
 360 #if 0
 361
 362    /* a*b/255 ~= (a*(b + 1)) >> 256 */
 363    b = LLVMBuildAdd(builder, b, c01, "");
 364    ab = LLVMBuildMul(builder, a, b, "");
 365
 366 #else
 367
 368    /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
 369    ab = LLVMBuildMul(builder, a, b, "");
 370    ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
 371    ab = LLVMBuildAdd(builder, ab, c80, "");
 372
 373 #endif
 374
 375    ab = LLVMBuildLShr(builder, ab, c08, "");
 376
 377    return ab;
 378 }
 379
 380
 381 LLVMValueRef
 382 lp_build_mul(struct lp_build_context *bld,
 383              LLVMValueRef a,
 384              LLVMValueRef b)
 385 {
 386    const union lp_type type = bld->type;
 387
 388    if(a == bld->zero)
 389       return bld->zero;
 390    if(a == bld->one)
 391       return b;
 392    if(b == bld->zero)
 393       return bld->zero;
 394    if(b == bld->one)
 395       return a;
 396    if(a == bld->undef || b == bld->undef)
 397       return bld->undef;
 398
 399    if(!type.floating && !type.fixed && type.norm) {
 400 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 401       if(type.width == 8 && type.length == 16) {
 402          LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
 403          LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
 404          static LLVMValueRef ml = NULL;
 405          static LLVMValueRef mh = NULL;
 406          LLVMValueRef al, ah, bl, bh;
 407          LLVMValueRef abl, abh;
 408          LLVMValueRef ab;
 409
 410          if(!ml) ml = lp_build_unpack_shuffle(16, 0);
 411          if(!mh) mh = lp_build_unpack_shuffle(16, 1);
 412
 413          /*  PUNPCKLBW, PUNPCKHBW */
 414          al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
 415          bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
 416          ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
 417          bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
 418
 419          /* NOP */
 420          al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
 421          bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
 422          ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
 423          bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
 424
 425          /* PMULLW, PSRLW, PADDW */
 426          abl = lp_build_mul_u8n(bld->builder, al, bl);
 427          abh = lp_build_mul_u8n(bld->builder, ah, bh);
 428
 429          /* PACKUSWB */
 430          ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
 431
 432          /* NOP */
 433          ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
 434
 435          return ab;
 436       }
 437 #endif
 438
 439       /* FIXME */
 440       assert(0);
 441    }
 442
 443    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 444       return LLVMConstMul(a, b);
 445
 446    return LLVMBuildMul(bld->builder, a, b, "");
 447 }
 448
 449
 450 LLVMValueRef
 451 lp_build_div(struct lp_build_context *bld,
 452              LLVMValueRef a,
 453              LLVMValueRef b)
 454 {
 455    const union lp_type type = bld->type;
 456
 457    if(a == bld->zero)
 458       return bld->zero;
 459    if(a == bld->one)
 460       return lp_build_rcp(bld, b);
 461    if(b == bld->zero)
 462       return bld->undef;
 463    if(b == bld->one)
 464       return a;
 465    if(a == bld->undef || b == bld->undef)
 466       return bld->undef;
 467
 468    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 469       return LLVMConstFDiv(a, b);
 470
 471 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 472    if(type.width == 32 && type.length == 4)
 473       return lp_build_mul(bld, a, lp_build_rcp(bld, b));
 474 #endif
 475
 476    return LLVMBuildFDiv(bld->builder, a, b, "");
 477 }
 478
 479
 480 LLVMValueRef
 481 lp_build_min(struct lp_build_context *bld,
 482              LLVMValueRef a,
 483              LLVMValueRef b)
 484 {
 485    if(a == bld->undef || b == bld->undef)
 486       return bld->undef;
 487
 488    if(a == b)
 489       return a;
 490
 491    if(bld->type.norm) {
 492       if(a == bld->zero || b == bld->zero)
 493          return bld->zero;
 494       if(a == bld->one)
 495          return b;
 496       if(b == bld->one)
 497          return a;
 498    }
 499
 500    return lp_build_min_simple(bld, a, b);
 501 }
 502
 503
 504 LLVMValueRef
 505 lp_build_max(struct lp_build_context *bld,
 506              LLVMValueRef a,
 507              LLVMValueRef b)
 508 {
 509    if(a == bld->undef || b == bld->undef)
 510       return bld->undef;
 511
 512    if(a == b)
 513       return a;
 514
 515    if(bld->type.norm) {
 516       if(a == bld->one || b == bld->one)
 517          return bld->one;
 518       if(a == bld->zero)
 519          return b;
 520       if(b == bld->zero)
 521          return a;
 522    }
 523
 524    return lp_build_max_simple(bld, a, b);
 525 }
 526
 527
 528 LLVMValueRef
 529 lp_build_abs(struct lp_build_context *bld,
 530              LLVMValueRef a)
 531 {
 532    const union lp_type type = bld->type;
 533
 534    if(!type.sign)
 535       return a;
 536
 537    /* XXX: is this really necessary? */
 538 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 539    if(!type.floating && type.width*type.length == 128) {
 540       LLVMTypeRef vec_type = lp_build_vec_type(type);
 541       if(type.width == 8)
 542          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
 543       if(type.width == 16)
 544          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
 545       if(type.width == 32)
 546          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
 547    }
 548 #endif
 549
 550    return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
 551 }
 552
 553
 554 LLVMValueRef
 555 lp_build_sqrt(struct lp_build_context *bld,
 556               LLVMValueRef a)
 557 {
 558    const union lp_type type = bld->type;
 559    LLVMTypeRef vec_type = lp_build_vec_type(type);
 560    char intrinsic[32];
 561
 562    /* TODO: optimize the constant case */
 563    /* TODO: optimize the constant case */
 564
 565    assert(type.floating);
 566    util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
 567
 568    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 569 }
 570
 571
 572 LLVMValueRef
 573 lp_build_rcp(struct lp_build_context *bld,
 574              LLVMValueRef a)
 575 {
 576    const union lp_type type = bld->type;
 577
 578    if(a == bld->zero)
 579       return bld->undef;
 580    if(a == bld->one)
 581       return bld->one;
 582    if(a == bld->undef)
 583       return bld->undef;
 584
 585    assert(type.floating);
 586
 587    if(LLVMIsConstant(a))
 588       return LLVMConstFDiv(bld->one, a);
 589
 590    /* XXX: is this really necessary? */
 591 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 592    if(type.width == 32 && type.length == 4)
 593       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
 594 #endif
 595
 596    return LLVMBuildFDiv(bld->builder, bld->one, a, "");
 597 }
 598
 599
 600 LLVMValueRef
 601 lp_build_rsqrt(struct lp_build_context *bld,
 602                LLVMValueRef a)
 603 {
 604    const union lp_type type = bld->type;
 605
 606    assert(type.floating);
 607
 608    /* XXX: is this really necessary? */
 609 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 610    if(type.width == 32 && type.length == 4)
 611       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
 612 #endif
 613
 614    return lp_build_rcp(bld, lp_build_sqrt(bld, a));
 615 }
 616
 617
 618 LLVMValueRef
 619 lp_build_cos(struct lp_build_context *bld,
 620               LLVMValueRef a)
 621 {
 622    const union lp_type type = bld->type;
 623    LLVMTypeRef vec_type = lp_build_vec_type(type);
 624    char intrinsic[32];
 625
 626    /* TODO: optimize the constant case */
 627
 628    assert(type.floating);
 629    util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
 630
 631    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 632 }
 633
 634
 635 LLVMValueRef
 636 lp_build_sin(struct lp_build_context *bld,
 637               LLVMValueRef a)
 638 {
 639    const union lp_type type = bld->type;
 640    LLVMTypeRef vec_type = lp_build_vec_type(type);
 641    char intrinsic[32];
 642
 643    /* TODO: optimize the constant case */
 644
 645    assert(type.floating);
 646    util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
 647
 648    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 649 }
 650
 651
 652 LLVMValueRef
 653 lp_build_pow(struct lp_build_context *bld,
 654              LLVMValueRef a,
 655              LLVMValueRef b)
 656 {
 657    const union lp_type type = bld->type;
 658    LLVMTypeRef vec_type = lp_build_vec_type(type);
 659    char intrinsic[32];
 660
 661    /* TODO: optimize the constant case */
 662
 663    assert(type.floating);
 664    util_snprintf(intrinsic, sizeof intrinsic, "llvm.pow.v%uf%u", type.length, type.width);
 665
 666    return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, b);
 667 }
 668
 669
 670 LLVMValueRef
 671 lp_build_exp(struct lp_build_context *bld,
 672              LLVMValueRef a)
 673 {
 674    /* FIXME: optimize */
 675    return lp_build_pow(bld, lp_build_const_uni(bld->type, 2.7182818284590452354), a);
 676 }
 677
 678
 679 LLVMValueRef
 680 lp_build_log(struct lp_build_context *bld,
 681              LLVMValueRef a)
 682 {
 683    /* FIXME: implement */
 684    return bld->undef;
 685 }
 686
 687
 688 #define EXP_POLY_DEGREE 3
 689 #define LOG_POLY_DEGREE 5
 690
 691
 692 static LLVMValueRef
 693 lp_build_polynomial(struct lp_build_context *bld,
 694                     LLVMValueRef x,
 695                     const double *coeffs,
 696                     unsigned num_coeffs)
 697 {
 698    const union lp_type type = bld->type;
 699    LLVMValueRef res = NULL;
 700    unsigned i;
 701
 702    for (i = num_coeffs; i--; ) {
 703       LLVMValueRef coeff = lp_build_const_uni(type, coeffs[i]);
 704       if(res)
 705          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
 706       else
 707          res = coeff;
 708    }
 709
 710    if(res)
 711       return res;
 712    else
 713       return bld->undef;
 714 }
 715
 716
 717 LLVMValueRef
 718 lp_build_exp2(struct lp_build_context *bld,
 719               LLVMValueRef a)
 720 {
 721    /* FIXME: optimize */
 722    return lp_build_pow(bld, lp_build_const_uni(bld->type, 2.0), a);
 723 }
 724
 725
 726 /**
 727  * See http://www.devmaster.net/forums/showthread.php?p=43580
 728  */
 729 LLVMValueRef
 730 lp_build_log2(struct lp_build_context *bld,
 731               LLVMValueRef x)
 732 {
 733    const union lp_type type = bld->type;
 734    LLVMTypeRef vec_type = lp_build_vec_type(type);
 735    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 736
 737    LLVMValueRef expmask = lp_build_int_const_uni(type, 0x7f800000);
 738    LLVMValueRef mantmask = lp_build_int_const_uni(type, 0x007fffff);
 739    LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
 740
 741    LLVMValueRef i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
 742
 743    LLVMValueRef exp;
 744    LLVMValueRef mant;
 745    LLVMValueRef logmant;
 746
 747    /* exp = (float) exponent(x) */
 748    exp = LLVMBuildAnd(bld->builder, i, expmask, "");
 749    exp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
 750    exp = LLVMBuildSub(bld->builder, exp, lp_build_int_const_uni(type, 127), "");
 751    exp = LLVMBuildSIToFP(bld->builder, exp, vec_type, "");
 752
 753    /* mant = (float) mantissa(x) */
 754    mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
 755    mant = LLVMBuildOr(bld->builder, mant, one, "");
 756    mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
 757
 758    /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
 759     * These coefficients can be generate with
 760     * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
 761     */
 762    const double polynomial[] = {
 763 #if LOG_POLY_DEGREE == 6
 764       3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313
 765 #elif LOG_POLY_DEGREE == 5
 766       2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533
 767 #elif LOG_POLY_DEGREE == 4
 768       2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454
 769 #elif LOG_POLY_DEGREE == 3
 770       2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516
 771 #else
 772 #error
 773 #endif
 774    };
 775
 776    logmant = lp_build_polynomial(bld, mant, polynomial, sizeof(polynomial)/sizeof(polynomial[0]));
 777
 778    /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
 779    logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
 780
 781    return LLVMBuildAdd(bld->builder, logmant, exp, "");
 782 }