src/gallium/auxiliary/gallivm/lp_bld_logic.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for logical operations.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_cpu_detect.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include "lp_bld_type.h"
  41 #include "lp_bld_const.h"
  42 #include "lp_bld_init.h"
  43 #include "lp_bld_intr.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_logic.h"
  46
  47
  48 /*
  49  * XXX
  50  *
  51  * Selection with vector conditional like
  52  *
  53  *    select <4 x i1> %C, %A, %B
  54  *
  55  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  56  * supported on some backends (x86) starting with llvm 3.1.
  57  *
  58  * Expanding the boolean vector to full SIMD register width, as in
  59  *
  60  *    sext <4 x i1> %C to <4 x i32>
  61  *
  62  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  63  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  64  * LLVM 2.7.
  65  */
  66
  67
  68 /**
  69  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  70  * \param func  one of PIPE_FUNC_x
  71  * If the ordered argument is true the function will use LLVM's ordered
  72  * comparisons, otherwise unordered comparisons will be used.
  73  * The result values will be 0 for false or ~0 for true.
  74  */
  75 static LLVMValueRef
  76 lp_build_compare_ext(struct gallivm_state *gallivm,
  77                      const struct lp_type type,
  78                      unsigned func,
  79                      LLVMValueRef a,
  80                      LLVMValueRef b,
  81                      boolean ordered)
  82 {
  83    LLVMBuilderRef builder = gallivm->builder;
  84    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  85    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  86    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  87    LLVMValueRef cond;
  88    LLVMValueRef res;
  89
  90    assert(func >= PIPE_FUNC_NEVER);
  91    assert(func <= PIPE_FUNC_ALWAYS);
  92    assert(lp_check_value(type, a));
  93    assert(lp_check_value(type, b));
  94
  95    if(func == PIPE_FUNC_NEVER)
  96       return zeros;
  97    if(func == PIPE_FUNC_ALWAYS)
  98       return ones;
  99
 100    if(type.floating) {
 101       LLVMRealPredicate op;
 102       switch(func) {
 103       case PIPE_FUNC_EQUAL:
 104          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
 105          break;
 106       case PIPE_FUNC_NOTEQUAL:
 107          op = ordered ? LLVMRealONE : LLVMRealUNE;
 108          break;
 109       case PIPE_FUNC_LESS:
 110          op = ordered ? LLVMRealOLT : LLVMRealULT;
 111          break;
 112       case PIPE_FUNC_LEQUAL:
 113          op = ordered ? LLVMRealOLE : LLVMRealULE;
 114          break;
 115       case PIPE_FUNC_GREATER:
 116          op = ordered ? LLVMRealOGT : LLVMRealUGT;
 117          break;
 118       case PIPE_FUNC_GEQUAL:
 119          op = ordered ? LLVMRealOGE : LLVMRealUGE;
 120          break;
 121       default:
 122          assert(0);
 123          return lp_build_undef(gallivm, type);
 124       }
 125
 126 #if HAVE_LLVM >= 0x0207
 127       cond = LLVMBuildFCmp(builder, op, a, b, "");
 128       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 129 #else
 130       if (type.length == 1) {
 131          cond = LLVMBuildFCmp(builder, op, a, b, "");
 132          res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 133       }
 134       else {
 135          unsigned i;
 136
 137          res = LLVMGetUndef(int_vec_type);
 138
 139          debug_printf("%s: warning: using slow element-wise float"
 140                       " vector comparison\n", __FUNCTION__);
 141          for (i = 0; i < type.length; ++i) {
 142             LLVMValueRef index = lp_build_const_int32(gallivm, i);
 143             cond = LLVMBuildFCmp(builder, op,
 144                                  LLVMBuildExtractElement(builder, a, index, ""),
 145                                  LLVMBuildExtractElement(builder, b, index, ""),
 146                                  "");
 147             cond = LLVMBuildSelect(builder, cond,
 148                                    LLVMConstExtractElement(ones, index),
 149                                    LLVMConstExtractElement(zeros, index),
 150                                    "");
 151             res = LLVMBuildInsertElement(builder, res, cond, index, "");
 152          }
 153       }
 154 #endif
 155    }
 156    else {
 157       LLVMIntPredicate op;
 158       switch(func) {
 159       case PIPE_FUNC_EQUAL:
 160          op = LLVMIntEQ;
 161          break;
 162       case PIPE_FUNC_NOTEQUAL:
 163          op = LLVMIntNE;
 164          break;
 165       case PIPE_FUNC_LESS:
 166          op = type.sign ? LLVMIntSLT : LLVMIntULT;
 167          break;
 168       case PIPE_FUNC_LEQUAL:
 169          op = type.sign ? LLVMIntSLE : LLVMIntULE;
 170          break;
 171       case PIPE_FUNC_GREATER:
 172          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
 173          break;
 174       case PIPE_FUNC_GEQUAL:
 175          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
 176          break;
 177       default:
 178          assert(0);
 179          return lp_build_undef(gallivm, type);
 180       }
 181
 182 #if HAVE_LLVM >= 0x0207
 183       cond = LLVMBuildICmp(builder, op, a, b, "");
 184       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 185 #else
 186       if (type.length == 1) {
 187          cond = LLVMBuildICmp(builder, op, a, b, "");
 188          res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 189       }
 190       else {
 191          unsigned i;
 192
 193          res = LLVMGetUndef(int_vec_type);
 194
 195          if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 196             debug_printf("%s: using slow element-wise int"
 197                          " vector comparison\n", __FUNCTION__);
 198          }
 199
 200          for(i = 0; i < type.length; ++i) {
 201             LLVMValueRef index = lp_build_const_int32(gallivm, i);
 202             cond = LLVMBuildICmp(builder, op,
 203                                  LLVMBuildExtractElement(builder, a, index, ""),
 204                                  LLVMBuildExtractElement(builder, b, index, ""),
 205                                  "");
 206             cond = LLVMBuildSelect(builder, cond,
 207                                    LLVMConstExtractElement(ones, index),
 208                                    LLVMConstExtractElement(zeros, index),
 209                                    "");
 210             res = LLVMBuildInsertElement(builder, res, cond, index, "");
 211          }
 212       }
 213 #endif
 214    }
 215
 216    return res;
 217 }
 218
 219 /**
 220  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 221  * \param func  one of PIPE_FUNC_x
 222  * The result values will be 0 for false or ~0 for true.
 223  */
 224 LLVMValueRef
 225 lp_build_compare(struct gallivm_state *gallivm,
 226                  const struct lp_type type,
 227                  unsigned func,
 228                  LLVMValueRef a,
 229                  LLVMValueRef b)
 230 {
 231    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
 232    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
 233    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
 234
 235    assert(func >= PIPE_FUNC_NEVER);
 236    assert(func <= PIPE_FUNC_ALWAYS);
 237    assert(lp_check_value(type, a));
 238    assert(lp_check_value(type, b));
 239
 240    if(func == PIPE_FUNC_NEVER)
 241       return zeros;
 242    if(func == PIPE_FUNC_ALWAYS)
 243       return ones;
 244
 245 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 246    /*
 247     * There are no unsigned integer comparison instructions in SSE.
 248     */
 249
 250    if (!type.floating && !type.sign &&
 251        type.width * type.length == 128 &&
 252        util_cpu_caps.has_sse2 &&
 253        (func == PIPE_FUNC_LESS ||
 254         func == PIPE_FUNC_LEQUAL ||
 255         func == PIPE_FUNC_GREATER ||
 256         func == PIPE_FUNC_GEQUAL) &&
 257        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
 258          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
 259                       __FUNCTION__, type.length, type.width);
 260    }
 261 #endif
 262
 263 #if HAVE_LLVM < 0x0207
 264 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 265    if(type.width * type.length == 128) {
 266       LLVMBuilderRef builder = gallivm->builder;
 267       LLVMValueRef cond;
 268       LLVMValueRef res;
 269       if(type.floating && util_cpu_caps.has_sse) {
 270          /* float[4] comparison */
 271          LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
 272          LLVMValueRef args[3];
 273          unsigned cc;
 274          boolean swap;
 275
 276          swap = FALSE;
 277          switch(func) {
 278          case PIPE_FUNC_EQUAL:
 279             cc = 0;
 280             break;
 281          case PIPE_FUNC_NOTEQUAL:
 282             cc = 4;
 283             break;
 284          case PIPE_FUNC_LESS:
 285             cc = 1;
 286             break;
 287          case PIPE_FUNC_LEQUAL:
 288             cc = 2;
 289             break;
 290          case PIPE_FUNC_GREATER:
 291             cc = 1;
 292             swap = TRUE;
 293             break;
 294          case PIPE_FUNC_GEQUAL:
 295             cc = 2;
 296             swap = TRUE;
 297             break;
 298          default:
 299             assert(0);
 300             return lp_build_undef(gallivm, type);
 301          }
 302
 303          if(swap) {
 304             args[0] = b;
 305             args[1] = a;
 306          }
 307          else {
 308             args[0] = a;
 309             args[1] = b;
 310          }
 311
 312          args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
 313          res = lp_build_intrinsic(builder,
 314                                   "llvm.x86.sse.cmp.ps",
 315                                   vec_type,
 316                                   args, 3);
 317          res = LLVMBuildBitCast(builder, res, int_vec_type, "");
 318          return res;
 319       }
 320       else if(util_cpu_caps.has_sse2) {
 321          /* int[4] comparison */
 322          static const struct {
 323             unsigned swap:1;
 324             unsigned eq:1;
 325             unsigned gt:1;
 326             unsigned not:1;
 327          } table[] = {
 328             {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
 329             {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
 330             {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
 331             {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
 332             {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
 333             {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
 334             {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
 335             {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
 336          };
 337          const char *pcmpeq;
 338          const char *pcmpgt;
 339          LLVMValueRef args[2];
 340          LLVMValueRef res;
 341          LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
 342
 343          switch (type.width) {
 344          case 8:
 345             pcmpeq = "llvm.x86.sse2.pcmpeq.b";
 346             pcmpgt = "llvm.x86.sse2.pcmpgt.b";
 347             break;
 348          case 16:
 349             pcmpeq = "llvm.x86.sse2.pcmpeq.w";
 350             pcmpgt = "llvm.x86.sse2.pcmpgt.w";
 351             break;
 352          case 32:
 353             pcmpeq = "llvm.x86.sse2.pcmpeq.d";
 354             pcmpgt = "llvm.x86.sse2.pcmpgt.d";
 355             break;
 356          default:
 357             assert(0);
 358             return lp_build_undef(gallivm, type);
 359          }
 360
 361          /* There are no unsigned comparison instructions. So flip the sign bit
 362           * so that the results match.
 363           */
 364          if (table[func].gt && !type.sign) {
 365             LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
 366             a = LLVMBuildXor(builder, a, msb, "");
 367             b = LLVMBuildXor(builder, b, msb, "");
 368          }
 369
 370          if(table[func].swap) {
 371             args[0] = b;
 372             args[1] = a;
 373          }
 374          else {
 375             args[0] = a;
 376             args[1] = b;
 377          }
 378
 379          if(table[func].eq)
 380             res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
 381          else if (table[func].gt)
 382             res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
 383          else
 384             res = LLVMConstNull(vec_type);
 385
 386          if(table[func].not)
 387             res = LLVMBuildNot(builder, res, "");
 388
 389          return res;
 390       }
 391    } /* if (type.width * type.length == 128) */
 392 #endif
 393 #endif /* HAVE_LLVM < 0x0207 */
 394
 395    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
 396 }
 397
 398 /**
 399  * Build code to compare two values 'a' and 'b' using the given func.
 400  * \param func  one of PIPE_FUNC_x
 401  * If the operands are floating point numbers, the function will use
 402  * ordered comparison which means that it will return true if both
 403  * operands are not a NaN and the specified condition evaluates to true.
 404  * The result values will be 0 for false or ~0 for true.
 405  */
 406 LLVMValueRef
 407 lp_build_cmp_ordered(struct lp_build_context *bld,
 408                      unsigned func,
 409                      LLVMValueRef a,
 410                      LLVMValueRef b)
 411 {
 412    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
 413 }
 414
 415 /**
 416  * Build code to compare two values 'a' and 'b' using the given func.
 417  * \param func  one of PIPE_FUNC_x
 418  * If the operands are floating point numbers, the function will use
 419  * unordered comparison which means that it will return true if either
 420  * operand is a NaN or the specified condition evaluates to true.
 421  * The result values will be 0 for false or ~0 for true.
 422  */
 423 LLVMValueRef
 424 lp_build_cmp(struct lp_build_context *bld,
 425              unsigned func,
 426              LLVMValueRef a,
 427              LLVMValueRef b)
 428 {
 429    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
 430 }
 431
 432
 433 /**
 434  * Return (mask & a) | (~mask & b);
 435  */
 436 LLVMValueRef
 437 lp_build_select_bitwise(struct lp_build_context *bld,
 438                         LLVMValueRef mask,
 439                         LLVMValueRef a,
 440                         LLVMValueRef b)
 441 {
 442    LLVMBuilderRef builder = bld->gallivm->builder;
 443    struct lp_type type = bld->type;
 444    LLVMValueRef res;
 445
 446    assert(lp_check_value(type, a));
 447    assert(lp_check_value(type, b));
 448
 449    if (a == b) {
 450       return a;
 451    }
 452
 453    if(type.floating) {
 454       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
 455       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
 456       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
 457    }
 458
 459    a = LLVMBuildAnd(builder, a, mask, "");
 460
 461    /* This often gets translated to PANDN, but sometimes the NOT is
 462     * pre-computed and stored in another constant. The best strategy depends
 463     * on available registers, so it is not a big deal -- hopefully LLVM does
 464     * the right decision attending the rest of the program.
 465     */
 466    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
 467
 468    res = LLVMBuildOr(builder, a, b, "");
 469
 470    if(type.floating) {
 471       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
 472       res = LLVMBuildBitCast(builder, res, vec_type, "");
 473    }
 474
 475    return res;
 476 }
 477
 478
 479 /**
 480  * Return mask ? a : b;
 481  *
 482  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
 483  * will yield unpredictable results.
 484  */
 485 LLVMValueRef
 486 lp_build_select(struct lp_build_context *bld,
 487                 LLVMValueRef mask,
 488                 LLVMValueRef a,
 489                 LLVMValueRef b)
 490 {
 491    LLVMBuilderRef builder = bld->gallivm->builder;
 492    LLVMContextRef lc = bld->gallivm->context;
 493    struct lp_type type = bld->type;
 494    LLVMValueRef res;
 495
 496    assert(lp_check_value(type, a));
 497    assert(lp_check_value(type, b));
 498
 499    if(a == b)
 500       return a;
 501
 502    if (type.length == 1) {
 503       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
 504       res = LLVMBuildSelect(builder, mask, a, b, "");
 505    }
 506    else if (0) {
 507       /* Generate a vector select.
 508        *
 509        * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
 510        * properly supported yet.
 511        *
 512        * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
 513        *
 514        * LLVM 3.0 includes experimental support provided the -promote-elements
 515        * options is passed to LLVM's command line (e.g., via
 516        * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
 517        * worse, probably because some optimization passes don't know how to
 518        * handle vector selects.
 519        *
 520        * See also:
 521        * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
 522        */
 523
 524       /* Convert the mask to a vector of booleans.
 525        * XXX: There are two ways to do this. Decide what's best.
 526        */
 527       if (1) {
 528          LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
 529          mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
 530       } else {
 531          mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
 532       }
 533       res = LLVMBuildSelect(builder, mask, a, b, "");
 534    }
 535    else if (((util_cpu_caps.has_sse4_1 &&
 536               type.width * type.length == 128) ||
 537              (util_cpu_caps.has_avx &&
 538               type.width * type.length == 256 && type.width >= 32)) &&
 539             !LLVMIsConstant(a) &&
 540             !LLVMIsConstant(b) &&
 541             !LLVMIsConstant(mask)) {
 542       const char *intrinsic;
 543       LLVMTypeRef arg_type;
 544       LLVMValueRef args[3];
 545
 546       /*
 547        *  There's only float blend in AVX but can just cast i32/i64
 548        *  to float.
 549        */
 550       if (type.width * type.length == 256) {
 551          if (type.width == 64) {
 552            intrinsic = "llvm.x86.avx.blendv.pd.256";
 553            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
 554          }
 555          else {
 556             intrinsic = "llvm.x86.avx.blendv.ps.256";
 557             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
 558          }
 559       }
 560       else if (type.floating &&
 561                type.width == 64) {
 562          intrinsic = "llvm.x86.sse41.blendvpd";
 563          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
 564       } else if (type.floating &&
 565                  type.width == 32) {
 566          intrinsic = "llvm.x86.sse41.blendvps";
 567          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
 568       } else {
 569          intrinsic = "llvm.x86.sse41.pblendvb";
 570          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
 571       }
 572
 573       if (arg_type != bld->int_vec_type) {
 574          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
 575       }
 576
 577       if (arg_type != bld->vec_type) {
 578          a = LLVMBuildBitCast(builder, a, arg_type, "");
 579          b = LLVMBuildBitCast(builder, b, arg_type, "");
 580       }
 581
 582       args[0] = b;
 583       args[1] = a;
 584       args[2] = mask;
 585
 586       res = lp_build_intrinsic(builder, intrinsic,
 587                                arg_type, args, Elements(args));
 588
 589       if (arg_type != bld->vec_type) {
 590          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
 591       }
 592    }
 593    else {
 594       res = lp_build_select_bitwise(bld, mask, a, b);
 595    }
 596
 597    return res;
 598 }
 599
 600
 601 /**
 602  * Return mask ? a : b;
 603  *
 604  * mask is a TGSI_WRITEMASK_xxx.
 605  */
 606 LLVMValueRef
 607 lp_build_select_aos(struct lp_build_context *bld,
 608                     unsigned mask,
 609                     LLVMValueRef a,
 610                     LLVMValueRef b,
 611                     unsigned num_channels)
 612 {
 613    LLVMBuilderRef builder = bld->gallivm->builder;
 614    const struct lp_type type = bld->type;
 615    const unsigned n = type.length;
 616    unsigned i, j;
 617
 618    assert((mask & ~0xf) == 0);
 619    assert(lp_check_value(type, a));
 620    assert(lp_check_value(type, b));
 621
 622    if(a == b)
 623       return a;
 624    if((mask & 0xf) == 0xf)
 625       return a;
 626    if((mask & 0xf) == 0x0)
 627       return b;
 628    if(a == bld->undef || b == bld->undef)
 629       return bld->undef;
 630
 631    /*
 632     * There are two major ways of accomplishing this:
 633     * - with a shuffle
 634     * - with a select
 635     *
 636     * The flip between these is empirical and might need to be adjusted.
 637     */
 638    if (n <= 4) {
 639       /*
 640        * Shuffle.
 641        */
 642       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 643       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 644
 645       for(j = 0; j < n; j += num_channels)
 646          for(i = 0; i < num_channels; ++i)
 647             shuffles[j + i] = LLVMConstInt(elem_type,
 648                                            (mask & (1 << i) ? 0 : n) + j + i,
 649                                            0);
 650
 651       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
 652    }
 653    else {
 654       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
 655       return lp_build_select(bld, mask_vec, a, b);
 656    }
 657 }
 658
 659
 660 /**
 661  * Return (scalar-cast)val ? true : false;
 662  */
 663 LLVMValueRef
 664 lp_build_any_true_range(struct lp_build_context *bld,
 665                         unsigned real_length,
 666                         LLVMValueRef val)
 667 {
 668    LLVMBuilderRef builder = bld->gallivm->builder;
 669    LLVMTypeRef scalar_type;
 670    LLVMTypeRef true_type;
 671
 672    assert(real_length <= bld->type.length);
 673
 674    true_type = LLVMIntTypeInContext(bld->gallivm->context,
 675                                     bld->type.width * real_length);
 676    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
 677                                       bld->type.width * bld->type.length);
 678    val = LLVMBuildBitCast(builder, val, scalar_type, "");
 679    /*
 680     * We're using always native types so we can use intrinsics.
 681     * However, if we don't do per-element calculations, we must ensure
 682     * the excess elements aren't used since they may contain garbage.
 683     */
 684    if (real_length < bld->type.length) {
 685       val = LLVMBuildTrunc(builder, val, true_type, "");
 686    }
 687    return LLVMBuildICmp(builder, LLVMIntNE,
 688                         val, LLVMConstNull(true_type), "");
 689 }