src/gallium/drivers/llvmpipe/lp_bld_arit.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * Helper
  32  *
  33  * LLVM IR doesn't support all basic arithmetic operations we care about (most
  34  * notably min/max and saturated operations), and it is often necessary to
  35  * resort machine-specific intrinsics directly. The functions here hide all
  36  * these implementation details from the other modules.
  37  *
  38  * We also do simple expressions simplification here. Reasons are:
  39  * - it is very easy given we have all necessary information readily available
  40  * - LLVM optimization passes fail to simplify several vector expressions
  41  * - We often know value constraints which the optimization passes have no way
  42  *   of knowing, such as when source arguments are known to be in [0, 1] range.
  43  *
  44  * @author Jose Fonseca <jfonseca@vmware.com>
  45  */
  46
  47
  48 #include "pipe/p_state.h"
  49
  50 #include "lp_bld_arit.h"
  51
  52
  53 LLVMTypeRef
  54 lp_build_elem_type(union lp_type type)
  55 {
  56    if (type.floating) {
  57       assert(type.sign);
  58       switch(type.width) {
  59          case 32:
  60          return LLVMFloatType();
  61          break;
  62       case 64:
  63          return LLVMDoubleType();
  64          break;
  65       default:
  66          assert(0);
  67          return LLVMFloatType();
  68       }
  69    }
  70    else {
  71       return LLVMIntType(type.width);
  72    }
  73 }
  74
  75
  76 LLVMTypeRef
  77 lp_build_vec_type(union lp_type type)
  78 {
  79    LLVMTypeRef elem_type = lp_build_elem_type(type);
  80    return LLVMVectorType(elem_type, type.length);
  81 }
  82
  83
  84 /**
  85  * This function is a mirrot of lp_build_elem_type() above.
  86  *
  87  * XXX: I'm not sure if it wouldn't be easier/efficient to just recreate the
  88  * type and check for identity.
  89  */
  90 boolean
  91 lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
  92 {
  93    LLVMTypeKind elem_kind;
  94
  95    assert(elem_type);
  96    if(!elem_type)
  97       return FALSE;
  98
  99    elem_kind = LLVMGetTypeKind(elem_type);
 100
 101    if (type.floating) {
 102       switch(type.width) {
 103       case 32:
 104          if(elem_kind != LLVMFloatTypeKind)
 105             return FALSE;
 106          break;
 107       case 64:
 108          if(elem_kind != LLVMDoubleTypeKind)
 109             return FALSE;
 110          break;
 111       default:
 112          assert(0);
 113          return FALSE;
 114       }
 115    }
 116    else {
 117       if(elem_kind != LLVMIntegerTypeKind)
 118          return FALSE;
 119
 120       if(LLVMGetIntTypeWidth(elem_type) != type.width)
 121          return FALSE;
 122    }
 123
 124    return TRUE;
 125 }
 126
 127
 128 boolean
 129 lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
 130 {
 131    LLVMTypeRef elem_type;
 132
 133    assert(vec_type);
 134    if(!vec_type)
 135       return FALSE;
 136
 137    if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
 138       return FALSE;
 139
 140    if(LLVMGetVectorSize(vec_type) != type.length)
 141       return FALSE;
 142
 143    elem_type = LLVMGetElementType(vec_type);
 144
 145    return lp_check_elem_type(type, elem_type);
 146 }
 147
 148
 149 boolean
 150 lp_check_value(union lp_type type, LLVMValueRef val)
 151 {
 152    LLVMTypeRef vec_type;
 153
 154    assert(val);
 155    if(!val)
 156       return FALSE;
 157
 158    vec_type = LLVMTypeOf(val);
 159
 160    return lp_check_vec_type(type, vec_type);
 161 }
 162
 163
 164 LLVMValueRef
 165 lp_build_undef(union lp_type type)
 166 {
 167    LLVMTypeRef vec_type = lp_build_vec_type(type);
 168    return LLVMGetUndef(vec_type);
 169 }
 170
 171
 172 LLVMValueRef
 173 lp_build_zero(union lp_type type)
 174 {
 175    LLVMTypeRef vec_type = lp_build_vec_type(type);
 176    return LLVMConstNull(vec_type);
 177 }
 178
 179
 180 LLVMValueRef
 181 lp_build_one(union lp_type type)
 182 {
 183    LLVMTypeRef elem_type;
 184    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 185    unsigned i;
 186
 187    assert(type.length <= LP_MAX_VECTOR_LENGTH);
 188
 189    elem_type = lp_build_elem_type(type);
 190
 191    if(type.floating)
 192       elems[0] = LLVMConstReal(elem_type, 1.0);
 193    else if(type.fixed)
 194       elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0);
 195    else if(!type.norm)
 196       elems[0] = LLVMConstInt(elem_type, 1, 0);
 197    else {
 198       /* special case' -- 1.0 for normalized types is more easily attained if
 199        * we start with a vector consisting of all bits set */
 200       LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length);
 201       LLVMValueRef vec = LLVMConstAllOnes(vec_type);
 202
 203       if(type.sign)
 204          vec = LLVMConstLShr(vec, LLVMConstInt(LLVMInt32Type(), 1, 0));
 205
 206       return vec;
 207    }
 208
 209    for(i = 1; i < type.length; ++i)
 210       elems[i] = elems[0];
 211
 212    return LLVMConstVector(elems, type.length);
 213 }
 214
 215
 216 LLVMValueRef
 217 lp_build_const_aos(union lp_type type,
 218                    double r, double g, double b, double a,
 219                    const unsigned char *swizzle)
 220 {
 221    const unsigned char default_swizzle[4] = {0, 1, 2, 3};
 222    LLVMTypeRef elem_type;
 223    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 224    unsigned i;
 225
 226    assert(type.length % 4 == 0);
 227    assert(type.length <= LP_MAX_VECTOR_LENGTH);
 228
 229    elem_type = lp_build_elem_type(type);
 230
 231    if(swizzle == NULL)
 232       swizzle = default_swizzle;
 233
 234    if(type.floating) {
 235       elems[swizzle[0]] = LLVMConstReal(elem_type, r);
 236       elems[swizzle[1]] = LLVMConstReal(elem_type, g);
 237       elems[swizzle[2]] = LLVMConstReal(elem_type, b);
 238       elems[swizzle[3]] = LLVMConstReal(elem_type, a);
 239    }
 240    else {
 241       unsigned shift;
 242       long long llscale;
 243       double dscale;
 244
 245       if(type.fixed)
 246          shift = type.width/2;
 247       else if(type.norm)
 248          shift = type.sign ? type.width - 1 : type.width;
 249       else
 250          shift = 0;
 251
 252       llscale = (long long)1 << shift;
 253       dscale = (double)llscale;
 254       assert((long long)dscale == llscale);
 255
 256       elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0);
 257       elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0);
 258       elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0);
 259       elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
 260    }
 261
 262    for(i = 4; i < type.length; ++i)
 263       elems[i] = elems[i % 4];
 264
 265    return LLVMConstVector(elems, type.length);
 266 }
 267
 268
 269 static LLVMValueRef
 270 lp_build_intrinsic_binary(LLVMBuilderRef builder,
 271                           const char *name,
 272                           LLVMValueRef a,
 273                           LLVMValueRef b)
 274 {
 275    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 276    LLVMValueRef function;
 277    LLVMValueRef args[2];
 278
 279    function = LLVMGetNamedFunction(module, name);
 280    if(!function) {
 281       LLVMTypeRef type = LLVMTypeOf(a);
 282       LLVMTypeRef arg_types[2];
 283       arg_types[0] = type;
 284       arg_types[1] = type;
 285       function = LLVMAddFunction(module, name, LLVMFunctionType(type, arg_types, 2, 0));
 286       LLVMSetFunctionCallConv(function, LLVMCCallConv);
 287       LLVMSetLinkage(function, LLVMExternalLinkage);
 288    }
 289    assert(LLVMIsDeclaration(function));
 290
 291 #ifdef DEBUG
 292    /* We shouldn't use only constants with intrinsics, as they won't be
 293     * propagated by LLVM optimization passes.
 294     */
 295    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 296       debug_printf("warning: invoking intrinsic \"%s\" with constants\n");
 297 #endif
 298
 299    args[0] = a;
 300    args[1] = b;
 301
 302    return LLVMBuildCall(builder, function, args, 2, "");
 303 }
 304
 305
 306 static LLVMValueRef
 307 lp_build_min_simple(struct lp_build_context *bld,
 308                     LLVMValueRef a,
 309                     LLVMValueRef b)
 310 {
 311    const union lp_type type = bld->type;
 312    const char *intrinsic = NULL;
 313    LLVMValueRef cond;
 314
 315    /* TODO: optimize the constant case */
 316
 317 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 318    if(type.width * type.length == 128) {
 319       if(type.floating)
 320          if(type.width == 32)
 321             intrinsic = "llvm.x86.sse.min.ps";
 322          if(type.width == 64)
 323             intrinsic = "llvm.x86.sse2.min.pd";
 324       else {
 325          if(type.width == 8 && !type.sign)
 326             intrinsic = "llvm.x86.sse2.pminu.b";
 327          if(type.width == 16 && type.sign)
 328             intrinsic = "llvm.x86.sse2.pmins.w";
 329       }
 330    }
 331 #endif
 332
 333    if(intrinsic)
 334       return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
 335
 336    if(type.floating)
 337       cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
 338    else
 339       cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
 340    return LLVMBuildSelect(bld->builder, cond, a, b, "");
 341 }
 342
 343
 344 static LLVMValueRef
 345 lp_build_max_simple(struct lp_build_context *bld,
 346                     LLVMValueRef a,
 347                     LLVMValueRef b)
 348 {
 349    const union lp_type type = bld->type;
 350    const char *intrinsic = NULL;
 351    LLVMValueRef cond;
 352
 353    /* TODO: optimize the constant case */
 354
 355 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 356    if(type.width * type.length == 128) {
 357       if(type.floating)
 358          if(type.width == 32)
 359             intrinsic = "llvm.x86.sse.max.ps";
 360          if(type.width == 64)
 361             intrinsic = "llvm.x86.sse2.max.pd";
 362       else {
 363          if(type.width == 8 && !type.sign)
 364             intrinsic = "llvm.x86.sse2.pmaxu.b";
 365          if(type.width == 16 && type.sign)
 366             intrinsic = "llvm.x86.sse2.pmaxs.w";
 367       }
 368    }
 369 #endif
 370
 371    if(intrinsic)
 372       return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
 373
 374    if(type.floating)
 375       cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
 376    else
 377       cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
 378    return LLVMBuildSelect(bld->builder, cond, b, a, "");
 379 }
 380
 381
 382 LLVMValueRef
 383 lp_build_comp(struct lp_build_context *bld,
 384               LLVMValueRef a)
 385 {
 386    const union lp_type type = bld->type;
 387
 388    if(a == bld->one)
 389       return bld->zero;
 390    if(a == bld->zero)
 391       return bld->one;
 392
 393    if(type.norm && !type.floating && !type.fixed && !type.sign) {
 394       if(LLVMIsConstant(a))
 395          return LLVMConstNot(a);
 396       else
 397          return LLVMBuildNot(bld->builder, a, "");
 398    }
 399
 400    if(LLVMIsConstant(a))
 401       return LLVMConstSub(bld->one, a);
 402    else
 403       return LLVMBuildSub(bld->builder, bld->one, a, "");
 404 }
 405
 406
 407 LLVMValueRef
 408 lp_build_add(struct lp_build_context *bld,
 409              LLVMValueRef a,
 410              LLVMValueRef b)
 411 {
 412    const union lp_type type = bld->type;
 413    LLVMValueRef res;
 414
 415    if(a == bld->zero)
 416       return b;
 417    if(b == bld->zero)
 418       return a;
 419    if(a == bld->undef || b == bld->undef)
 420       return bld->undef;
 421
 422    if(bld->type.norm) {
 423       const char *intrinsic = NULL;
 424
 425       if(a == bld->one || b == bld->one)
 426         return bld->one;
 427
 428 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 429       if(type.width * type.length == 128 &&
 430          !type.floating && !type.fixed) {
 431          if(type.width == 8)
 432             intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
 433          if(type.width == 16)
 434             intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
 435       }
 436 #endif
 437
 438       if(intrinsic)
 439          return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
 440    }
 441
 442    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 443       res = LLVMConstAdd(a, b);
 444    else
 445       res = LLVMBuildAdd(bld->builder, a, b, "");
 446
 447    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 448       res = lp_build_min_simple(bld, res, bld->one);
 449
 450    return res;
 451 }
 452
 453
 454 LLVMValueRef
 455 lp_build_sub(struct lp_build_context *bld,
 456              LLVMValueRef a,
 457              LLVMValueRef b)
 458 {
 459    const union lp_type type = bld->type;
 460    LLVMValueRef res;
 461
 462    if(b == bld->zero)
 463       return a;
 464    if(a == bld->undef || b == bld->undef)
 465       return bld->undef;
 466    if(a == b)
 467       return bld->zero;
 468
 469    if(bld->type.norm) {
 470       const char *intrinsic = NULL;
 471
 472       if(b == bld->one)
 473         return bld->zero;
 474
 475 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 476       if(type.width * type.length == 128 &&
 477          !type.floating && !type.fixed) {
 478          if(type.width == 8)
 479             intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
 480          if(type.width == 16)
 481             intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
 482       }
 483 #endif
 484
 485       if(intrinsic)
 486          return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
 487    }
 488
 489    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 490       res = LLVMConstSub(a, b);
 491    else
 492       res = LLVMBuildSub(bld->builder, a, b, "");
 493
 494    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 495       res = lp_build_max_simple(bld, res, bld->zero);
 496
 497    return res;
 498 }
 499
 500
 501 /**
 502  * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
 503  */
 504 static LLVMValueRef
 505 lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
 506 {
 507    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 508    unsigned i, j;
 509
 510    assert(n <= LP_MAX_VECTOR_LENGTH);
 511    assert(lo_hi < 2);
 512
 513    for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
 514       elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
 515       elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
 516    }
 517
 518    return LLVMConstVector(elems, n);
 519 }
 520
 521
 522 static LLVMValueRef
 523 lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
 524 {
 525    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 526    unsigned i;
 527
 528    assert(n <= LP_MAX_VECTOR_LENGTH);
 529
 530    for(i = 0; i < n; ++i)
 531       elems[i] = LLVMConstInt(type, c, 0);
 532
 533    return LLVMConstVector(elems, n);
 534 }
 535
 536
 537 /**
 538  * Normalized 8bit multiplication.
 539  *
 540  * - alpha plus one
 541  *
 542  *     makes the following approximation to the division (Sree)
 543  *
 544  *       a*b/255 ~= (a*(b + 1)) >> 256
 545  *
 546  *     which is the fastest method that satisfies the following OpenGL criteria
 547  *
 548  *       0*0 = 0 and 255*255 = 255
 549  *
 550  * - geometric series
 551  *
 552  *     takes the geometric series approximation to the division
 553  *
 554  *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 555  *
 556  *     in this case just the first two terms to fit in 16bit arithmetic
 557  *
 558  *       t/255 ~= (t + (t >> 8)) >> 8
 559  *
 560  *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 561  *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 562  *     must be used
 563  *
 564  * - geometric series plus rounding
 565  *
 566  *     when using a geometric series division instead of truncating the result
 567  *     use roundoff in the approximation (Jim Blinn)
 568  *
 569  *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 570  *
 571  *     achieving the exact results
 572  *
 573  * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
 574  *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 575  * @sa Michael Herf, The "double blend trick", May 2000,
 576  *     http://www.stereopsis.com/doubleblend.html
 577  */
 578 static LLVMValueRef
 579 lp_build_mul_u8n(LLVMBuilderRef builder,
 580                  LLVMValueRef a, LLVMValueRef b)
 581 {
 582    static LLVMValueRef c01 = NULL;
 583    static LLVMValueRef c08 = NULL;
 584    static LLVMValueRef c80 = NULL;
 585    LLVMValueRef ab;
 586
 587    if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
 588    if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
 589    if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
 590
 591 #if 0
 592
 593    /* a*b/255 ~= (a*(b + 1)) >> 256 */
 594    b = LLVMBuildAdd(builder, b, c01, "");
 595    ab = LLVMBuildMul(builder, a, b, "");
 596
 597 #else
 598
 599    /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
 600    ab = LLVMBuildMul(builder, a, b, "");
 601    ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
 602    ab = LLVMBuildAdd(builder, ab, c80, "");
 603
 604 #endif
 605
 606    ab = LLVMBuildLShr(builder, ab, c08, "");
 607
 608    return ab;
 609 }
 610
 611
 612 LLVMValueRef
 613 lp_build_mul(struct lp_build_context *bld,
 614              LLVMValueRef a,
 615              LLVMValueRef b)
 616 {
 617    const union lp_type type = bld->type;
 618
 619    if(a == bld->zero)
 620       return bld->zero;
 621    if(a == bld->one)
 622       return b;
 623    if(b == bld->zero)
 624       return bld->zero;
 625    if(b == bld->one)
 626       return a;
 627    if(a == bld->undef || b == bld->undef)
 628       return bld->undef;
 629
 630    if(!type.floating && !type.fixed && type.norm) {
 631 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 632       if(type.width == 8 && type.length == 16) {
 633          LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
 634          LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
 635          static LLVMValueRef ml = NULL;
 636          static LLVMValueRef mh = NULL;
 637          LLVMValueRef al, ah, bl, bh;
 638          LLVMValueRef abl, abh;
 639          LLVMValueRef ab;
 640
 641          if(!ml) ml = lp_build_unpack_shuffle(16, 0);
 642          if(!mh) mh = lp_build_unpack_shuffle(16, 1);
 643
 644          /*  PUNPCKLBW, PUNPCKHBW */
 645          al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
 646          bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
 647          ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
 648          bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
 649
 650          /* NOP */
 651          al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
 652          bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
 653          ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
 654          bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
 655
 656          /* PMULLW, PSRLW, PADDW */
 657          abl = lp_build_mul_u8n(bld->builder, al, bl);
 658          abh = lp_build_mul_u8n(bld->builder, ah, bh);
 659
 660          /* PACKUSWB */
 661          ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , abl, abh);
 662
 663          /* NOP */
 664          ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
 665
 666          return ab;
 667       }
 668 #endif
 669
 670       /* FIXME */
 671       assert(0);
 672    }
 673
 674    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 675       return LLVMConstMul(a, b);
 676
 677    return LLVMBuildMul(bld->builder, a, b, "");
 678 }
 679
 680
 681 LLVMValueRef
 682 lp_build_min(struct lp_build_context *bld,
 683              LLVMValueRef a,
 684              LLVMValueRef b)
 685 {
 686    if(a == bld->undef || b == bld->undef)
 687       return bld->undef;
 688
 689    if(a == b)
 690       return a;
 691
 692    if(bld->type.norm) {
 693       if(a == bld->zero || b == bld->zero)
 694          return bld->zero;
 695       if(a == bld->one)
 696          return b;
 697       if(b == bld->one)
 698          return a;
 699    }
 700
 701    return lp_build_min_simple(bld, a, b);
 702 }
 703
 704
 705 LLVMValueRef
 706 lp_build_max(struct lp_build_context *bld,
 707              LLVMValueRef a,
 708              LLVMValueRef b)
 709 {
 710    if(a == bld->undef || b == bld->undef)
 711       return bld->undef;
 712
 713    if(a == b)
 714       return a;
 715
 716    if(bld->type.norm) {
 717       if(a == bld->one || b == bld->one)
 718          return bld->one;
 719       if(a == bld->zero)
 720          return b;
 721       if(b == bld->zero)
 722          return a;
 723    }
 724
 725    return lp_build_max_simple(bld, a, b);
 726 }