src/gallium/auxiliary/gallivm/lp_bld_logic.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for logical operations.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_cpu_detect.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include "lp_bld_type.h"
  41 #include "lp_bld_const.h"
  42 #include "lp_bld_init.h"
  43 #include "lp_bld_intr.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_logic.h"
  46
  47
  48 /*
  49  * XXX
  50  *
  51  * Selection with vector conditional like
  52  *
  53  *    select <4 x i1> %C, %A, %B
  54  *
  55  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  56  * supported on some backends (x86) starting with llvm 3.1.
  57  *
  58  * Expanding the boolean vector to full SIMD register width, as in
  59  *
  60  *    sext <4 x i1> %C to <4 x i32>
  61  *
  62  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  63  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  64  * LLVM 2.7.
  65  */
  66
  67
  68 /**
  69  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  70  * \param func  one of PIPE_FUNC_x
  71  * If the ordered argument is true the function will use LLVM's ordered
  72  * comparisons, otherwise unordered comparisons will be used.
  73  * The result values will be 0 for false or ~0 for true.
  74  */
  75 static LLVMValueRef
  76 lp_build_compare_ext(struct gallivm_state *gallivm,
  77                      const struct lp_type type,
  78                      unsigned func,
  79                      LLVMValueRef a,
  80                      LLVMValueRef b,
  81                      boolean ordered)
  82 {
  83    LLVMBuilderRef builder = gallivm->builder;
  84    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  85    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  86    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  87    LLVMValueRef cond;
  88    LLVMValueRef res;
  89
  90    assert(func >= PIPE_FUNC_NEVER);
  91    assert(func <= PIPE_FUNC_ALWAYS);
  92    assert(lp_check_value(type, a));
  93    assert(lp_check_value(type, b));
  94
  95    if(func == PIPE_FUNC_NEVER)
  96       return zeros;
  97    if(func == PIPE_FUNC_ALWAYS)
  98       return ones;
  99
 100    if(type.floating) {
 101       LLVMRealPredicate op;
 102       switch(func) {
 103       case PIPE_FUNC_EQUAL:
 104          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
 105          break;
 106       case PIPE_FUNC_NOTEQUAL:
 107          op = ordered ? LLVMRealONE : LLVMRealUNE;
 108          break;
 109       case PIPE_FUNC_LESS:
 110          op = ordered ? LLVMRealOLT : LLVMRealULT;
 111          break;
 112       case PIPE_FUNC_LEQUAL:
 113          op = ordered ? LLVMRealOLE : LLVMRealULE;
 114          break;
 115       case PIPE_FUNC_GREATER:
 116          op = ordered ? LLVMRealOGT : LLVMRealUGT;
 117          break;
 118       case PIPE_FUNC_GEQUAL:
 119          op = ordered ? LLVMRealOGE : LLVMRealUGE;
 120          break;
 121       default:
 122          assert(0);
 123          return lp_build_undef(gallivm, type);
 124       }
 125
 126       cond = LLVMBuildFCmp(builder, op, a, b, "");
 127       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 128    }
 129    else {
 130       LLVMIntPredicate op;
 131       switch(func) {
 132       case PIPE_FUNC_EQUAL:
 133          op = LLVMIntEQ;
 134          break;
 135       case PIPE_FUNC_NOTEQUAL:
 136          op = LLVMIntNE;
 137          break;
 138       case PIPE_FUNC_LESS:
 139          op = type.sign ? LLVMIntSLT : LLVMIntULT;
 140          break;
 141       case PIPE_FUNC_LEQUAL:
 142          op = type.sign ? LLVMIntSLE : LLVMIntULE;
 143          break;
 144       case PIPE_FUNC_GREATER:
 145          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
 146          break;
 147       case PIPE_FUNC_GEQUAL:
 148          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
 149          break;
 150       default:
 151          assert(0);
 152          return lp_build_undef(gallivm, type);
 153       }
 154
 155       cond = LLVMBuildICmp(builder, op, a, b, "");
 156       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 157    }
 158
 159    return res;
 160 }
 161
 162 /**
 163  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 164  * \param func  one of PIPE_FUNC_x
 165  * The result values will be 0 for false or ~0 for true.
 166  */
 167 LLVMValueRef
 168 lp_build_compare(struct gallivm_state *gallivm,
 169                  const struct lp_type type,
 170                  unsigned func,
 171                  LLVMValueRef a,
 172                  LLVMValueRef b)
 173 {
 174    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
 175    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
 176    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
 177
 178    assert(func >= PIPE_FUNC_NEVER);
 179    assert(func <= PIPE_FUNC_ALWAYS);
 180    assert(lp_check_value(type, a));
 181    assert(lp_check_value(type, b));
 182
 183    if(func == PIPE_FUNC_NEVER)
 184       return zeros;
 185    if(func == PIPE_FUNC_ALWAYS)
 186       return ones;
 187
 188 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 189    /*
 190     * There are no unsigned integer comparison instructions in SSE.
 191     */
 192
 193    if (!type.floating && !type.sign &&
 194        type.width * type.length == 128 &&
 195        util_cpu_caps.has_sse2 &&
 196        (func == PIPE_FUNC_LESS ||
 197         func == PIPE_FUNC_LEQUAL ||
 198         func == PIPE_FUNC_GREATER ||
 199         func == PIPE_FUNC_GEQUAL) &&
 200        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
 201          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
 202                       __FUNCTION__, type.length, type.width);
 203    }
 204 #endif
 205
 206    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
 207 }
 208
 209 /**
 210  * Build code to compare two values 'a' and 'b' using the given func.
 211  * \param func  one of PIPE_FUNC_x
 212  * If the operands are floating point numbers, the function will use
 213  * ordered comparison which means that it will return true if both
 214  * operands are not a NaN and the specified condition evaluates to true.
 215  * The result values will be 0 for false or ~0 for true.
 216  */
 217 LLVMValueRef
 218 lp_build_cmp_ordered(struct lp_build_context *bld,
 219                      unsigned func,
 220                      LLVMValueRef a,
 221                      LLVMValueRef b)
 222 {
 223    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
 224 }
 225
 226 /**
 227  * Build code to compare two values 'a' and 'b' using the given func.
 228  * \param func  one of PIPE_FUNC_x
 229  * If the operands are floating point numbers, the function will use
 230  * unordered comparison which means that it will return true if either
 231  * operand is a NaN or the specified condition evaluates to true.
 232  * The result values will be 0 for false or ~0 for true.
 233  */
 234 LLVMValueRef
 235 lp_build_cmp(struct lp_build_context *bld,
 236              unsigned func,
 237              LLVMValueRef a,
 238              LLVMValueRef b)
 239 {
 240    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
 241 }
 242
 243
 244 /**
 245  * Return (mask & a) | (~mask & b);
 246  */
 247 LLVMValueRef
 248 lp_build_select_bitwise(struct lp_build_context *bld,
 249                         LLVMValueRef mask,
 250                         LLVMValueRef a,
 251                         LLVMValueRef b)
 252 {
 253    LLVMBuilderRef builder = bld->gallivm->builder;
 254    struct lp_type type = bld->type;
 255    LLVMValueRef res;
 256
 257    assert(lp_check_value(type, a));
 258    assert(lp_check_value(type, b));
 259
 260    if (a == b) {
 261       return a;
 262    }
 263
 264    if(type.floating) {
 265       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
 266       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
 267       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
 268    }
 269
 270    a = LLVMBuildAnd(builder, a, mask, "");
 271
 272    /* This often gets translated to PANDN, but sometimes the NOT is
 273     * pre-computed and stored in another constant. The best strategy depends
 274     * on available registers, so it is not a big deal -- hopefully LLVM does
 275     * the right decision attending the rest of the program.
 276     */
 277    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
 278
 279    res = LLVMBuildOr(builder, a, b, "");
 280
 281    if(type.floating) {
 282       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
 283       res = LLVMBuildBitCast(builder, res, vec_type, "");
 284    }
 285
 286    return res;
 287 }
 288
 289
 290 /**
 291  * Return mask ? a : b;
 292  *
 293  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
 294  * will yield unpredictable results.
 295  */
 296 LLVMValueRef
 297 lp_build_select(struct lp_build_context *bld,
 298                 LLVMValueRef mask,
 299                 LLVMValueRef a,
 300                 LLVMValueRef b)
 301 {
 302    LLVMBuilderRef builder = bld->gallivm->builder;
 303    LLVMContextRef lc = bld->gallivm->context;
 304    struct lp_type type = bld->type;
 305    LLVMValueRef res;
 306
 307    assert(lp_check_value(type, a));
 308    assert(lp_check_value(type, b));
 309
 310    if(a == b)
 311       return a;
 312
 313    if (type.length == 1) {
 314       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
 315       res = LLVMBuildSelect(builder, mask, a, b, "");
 316    }
 317    else if (0) {
 318       /* Generate a vector select.
 319        *
 320        * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
 321        * properly supported yet.
 322        *
 323        * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
 324        *
 325        * LLVM 3.0 includes experimental support provided the -promote-elements
 326        * options is passed to LLVM's command line (e.g., via
 327        * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
 328        * worse, probably because some optimization passes don't know how to
 329        * handle vector selects.
 330        *
 331        * See also:
 332        * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
 333        */
 334
 335       /* Convert the mask to a vector of booleans.
 336        * XXX: There are two ways to do this. Decide what's best.
 337        */
 338       if (1) {
 339          LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
 340          mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
 341       } else {
 342          mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
 343       }
 344       res = LLVMBuildSelect(builder, mask, a, b, "");
 345    }
 346    else if (((util_cpu_caps.has_sse4_1 &&
 347               type.width * type.length == 128) ||
 348              (util_cpu_caps.has_avx &&
 349               type.width * type.length == 256 && type.width >= 32)) &&
 350             !LLVMIsConstant(a) &&
 351             !LLVMIsConstant(b) &&
 352             !LLVMIsConstant(mask)) {
 353       const char *intrinsic;
 354       LLVMTypeRef arg_type;
 355       LLVMValueRef args[3];
 356
 357       /*
 358        *  There's only float blend in AVX but can just cast i32/i64
 359        *  to float.
 360        */
 361       if (type.width * type.length == 256) {
 362          if (type.width == 64) {
 363            intrinsic = "llvm.x86.avx.blendv.pd.256";
 364            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
 365          }
 366          else {
 367             intrinsic = "llvm.x86.avx.blendv.ps.256";
 368             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
 369          }
 370       }
 371       else if (type.floating &&
 372                type.width == 64) {
 373          intrinsic = "llvm.x86.sse41.blendvpd";
 374          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
 375       } else if (type.floating &&
 376                  type.width == 32) {
 377          intrinsic = "llvm.x86.sse41.blendvps";
 378          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
 379       } else {
 380          intrinsic = "llvm.x86.sse41.pblendvb";
 381          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
 382       }
 383
 384       if (arg_type != bld->int_vec_type) {
 385          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
 386       }
 387
 388       if (arg_type != bld->vec_type) {
 389          a = LLVMBuildBitCast(builder, a, arg_type, "");
 390          b = LLVMBuildBitCast(builder, b, arg_type, "");
 391       }
 392
 393       args[0] = b;
 394       args[1] = a;
 395       args[2] = mask;
 396
 397       res = lp_build_intrinsic(builder, intrinsic,
 398                                arg_type, args, Elements(args));
 399
 400       if (arg_type != bld->vec_type) {
 401          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
 402       }
 403    }
 404    else {
 405       res = lp_build_select_bitwise(bld, mask, a, b);
 406    }
 407
 408    return res;
 409 }
 410
 411
 412 /**
 413  * Return mask ? a : b;
 414  *
 415  * mask is a TGSI_WRITEMASK_xxx.
 416  */
 417 LLVMValueRef
 418 lp_build_select_aos(struct lp_build_context *bld,
 419                     unsigned mask,
 420                     LLVMValueRef a,
 421                     LLVMValueRef b,
 422                     unsigned num_channels)
 423 {
 424    LLVMBuilderRef builder = bld->gallivm->builder;
 425    const struct lp_type type = bld->type;
 426    const unsigned n = type.length;
 427    unsigned i, j;
 428
 429    assert((mask & ~0xf) == 0);
 430    assert(lp_check_value(type, a));
 431    assert(lp_check_value(type, b));
 432
 433    if(a == b)
 434       return a;
 435    if((mask & 0xf) == 0xf)
 436       return a;
 437    if((mask & 0xf) == 0x0)
 438       return b;
 439    if(a == bld->undef || b == bld->undef)
 440       return bld->undef;
 441
 442    /*
 443     * There are two major ways of accomplishing this:
 444     * - with a shuffle
 445     * - with a select
 446     *
 447     * The flip between these is empirical and might need to be adjusted.
 448     */
 449    if (n <= 4) {
 450       /*
 451        * Shuffle.
 452        */
 453       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 454       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 455
 456       for(j = 0; j < n; j += num_channels)
 457          for(i = 0; i < num_channels; ++i)
 458             shuffles[j + i] = LLVMConstInt(elem_type,
 459                                            (mask & (1 << i) ? 0 : n) + j + i,
 460                                            0);
 461
 462       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
 463    }
 464    else {
 465       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
 466       return lp_build_select(bld, mask_vec, a, b);
 467    }
 468 }
 469
 470
 471 /**
 472  * Return (scalar-cast)val ? true : false;
 473  */
 474 LLVMValueRef
 475 lp_build_any_true_range(struct lp_build_context *bld,
 476                         unsigned real_length,
 477                         LLVMValueRef val)
 478 {
 479    LLVMBuilderRef builder = bld->gallivm->builder;
 480    LLVMTypeRef scalar_type;
 481    LLVMTypeRef true_type;
 482
 483    assert(real_length <= bld->type.length);
 484
 485    true_type = LLVMIntTypeInContext(bld->gallivm->context,
 486                                     bld->type.width * real_length);
 487    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
 488                                       bld->type.width * bld->type.length);
 489    val = LLVMBuildBitCast(builder, val, scalar_type, "");
 490    /*
 491     * We're using always native types so we can use intrinsics.
 492     * However, if we don't do per-element calculations, we must ensure
 493     * the excess elements aren't used since they may contain garbage.
 494     */
 495    if (real_length < bld->type.length) {
 496       val = LLVMBuildTrunc(builder, val, true_type, "");
 497    }
 498    return LLVMBuildICmp(builder, LLVMIntNE,
 499                         val, LLVMConstNull(true_type), "");
 500 }