src/gallium/auxiliary/gallivm/lp_bld_logic.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for logical operations.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_cpu_detect.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include "lp_bld_type.h"
  41 #include "lp_bld_const.h"
  42 #include "lp_bld_swizzle.h"
  43 #include "lp_bld_init.h"
  44 #include "lp_bld_intr.h"
  45 #include "lp_bld_debug.h"
  46 #include "lp_bld_logic.h"
  47
  48
  49 /*
  50  * XXX
  51  *
  52  * Selection with vector conditional like
  53  *
  54  *    select <4 x i1> %C, %A, %B
  55  *
  56  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  57  * supported on some backends (x86) starting with llvm 3.1.
  58  *
  59  * Expanding the boolean vector to full SIMD register width, as in
  60  *
  61  *    sext <4 x i1> %C to <4 x i32>
  62  *
  63  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  64  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  65  * LLVM 2.7.
  66  */
  67
  68
  69 /**
  70  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  71  * \param func  one of PIPE_FUNC_x
  72  * If the ordered argument is true the function will use LLVM's ordered
  73  * comparisons, otherwise unordered comparisons will be used.
  74  * The result values will be 0 for false or ~0 for true.
  75  */
  76 static LLVMValueRef
  77 lp_build_compare_ext(struct gallivm_state *gallivm,
  78                      const struct lp_type type,
  79                      unsigned func,
  80                      LLVMValueRef a,
  81                      LLVMValueRef b,
  82                      boolean ordered)
  83 {
  84    LLVMBuilderRef builder = gallivm->builder;
  85    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  86    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  87    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  88    LLVMValueRef cond;
  89    LLVMValueRef res;
  90
  91    assert(lp_check_value(type, a));
  92    assert(lp_check_value(type, b));
  93
  94    if(func == PIPE_FUNC_NEVER)
  95       return zeros;
  96    if(func == PIPE_FUNC_ALWAYS)
  97       return ones;
  98
  99    assert(func > PIPE_FUNC_NEVER);
 100    assert(func < PIPE_FUNC_ALWAYS);
 101
 102    if(type.floating) {
 103       LLVMRealPredicate op;
 104       switch(func) {
 105       case PIPE_FUNC_EQUAL:
 106          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
 107          break;
 108       case PIPE_FUNC_NOTEQUAL:
 109          op = ordered ? LLVMRealONE : LLVMRealUNE;
 110          break;
 111       case PIPE_FUNC_LESS:
 112          op = ordered ? LLVMRealOLT : LLVMRealULT;
 113          break;
 114       case PIPE_FUNC_LEQUAL:
 115          op = ordered ? LLVMRealOLE : LLVMRealULE;
 116          break;
 117       case PIPE_FUNC_GREATER:
 118          op = ordered ? LLVMRealOGT : LLVMRealUGT;
 119          break;
 120       case PIPE_FUNC_GEQUAL:
 121          op = ordered ? LLVMRealOGE : LLVMRealUGE;
 122          break;
 123       default:
 124          assert(0);
 125          return lp_build_undef(gallivm, type);
 126       }
 127
 128       cond = LLVMBuildFCmp(builder, op, a, b, "");
 129       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 130    }
 131    else {
 132       LLVMIntPredicate op;
 133       switch(func) {
 134       case PIPE_FUNC_EQUAL:
 135          op = LLVMIntEQ;
 136          break;
 137       case PIPE_FUNC_NOTEQUAL:
 138          op = LLVMIntNE;
 139          break;
 140       case PIPE_FUNC_LESS:
 141          op = type.sign ? LLVMIntSLT : LLVMIntULT;
 142          break;
 143       case PIPE_FUNC_LEQUAL:
 144          op = type.sign ? LLVMIntSLE : LLVMIntULE;
 145          break;
 146       case PIPE_FUNC_GREATER:
 147          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
 148          break;
 149       case PIPE_FUNC_GEQUAL:
 150          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
 151          break;
 152       default:
 153          assert(0);
 154          return lp_build_undef(gallivm, type);
 155       }
 156
 157       cond = LLVMBuildICmp(builder, op, a, b, "");
 158       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 159    }
 160
 161    return res;
 162 }
 163
 164 /**
 165  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 166  * \param func  one of PIPE_FUNC_x
 167  * The result values will be 0 for false or ~0 for true.
 168  */
 169 LLVMValueRef
 170 lp_build_compare(struct gallivm_state *gallivm,
 171                  const struct lp_type type,
 172                  unsigned func,
 173                  LLVMValueRef a,
 174                  LLVMValueRef b)
 175 {
 176    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
 177    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
 178    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
 179
 180    assert(lp_check_value(type, a));
 181    assert(lp_check_value(type, b));
 182
 183    if(func == PIPE_FUNC_NEVER)
 184       return zeros;
 185    if(func == PIPE_FUNC_ALWAYS)
 186       return ones;
 187
 188    assert(func > PIPE_FUNC_NEVER);
 189    assert(func < PIPE_FUNC_ALWAYS);
 190
 191 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 192    /*
 193     * There are no unsigned integer comparison instructions in SSE.
 194     */
 195
 196    if (!type.floating && !type.sign &&
 197        type.width * type.length == 128 &&
 198        util_cpu_caps.has_sse2 &&
 199        (func == PIPE_FUNC_LESS ||
 200         func == PIPE_FUNC_LEQUAL ||
 201         func == PIPE_FUNC_GREATER ||
 202         func == PIPE_FUNC_GEQUAL) &&
 203        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
 204          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
 205                       __FUNCTION__, type.length, type.width);
 206    }
 207 #endif
 208
 209    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
 210 }
 211
 212 /**
 213  * Build code to compare two values 'a' and 'b' using the given func.
 214  * \param func  one of PIPE_FUNC_x
 215  * If the operands are floating point numbers, the function will use
 216  * ordered comparison which means that it will return true if both
 217  * operands are not a NaN and the specified condition evaluates to true.
 218  * The result values will be 0 for false or ~0 for true.
 219  */
 220 LLVMValueRef
 221 lp_build_cmp_ordered(struct lp_build_context *bld,
 222                      unsigned func,
 223                      LLVMValueRef a,
 224                      LLVMValueRef b)
 225 {
 226    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
 227 }
 228
 229 /**
 230  * Build code to compare two values 'a' and 'b' using the given func.
 231  * \param func  one of PIPE_FUNC_x
 232  * If the operands are floating point numbers, the function will use
 233  * unordered comparison which means that it will return true if either
 234  * operand is a NaN or the specified condition evaluates to true.
 235  * The result values will be 0 for false or ~0 for true.
 236  */
 237 LLVMValueRef
 238 lp_build_cmp(struct lp_build_context *bld,
 239              unsigned func,
 240              LLVMValueRef a,
 241              LLVMValueRef b)
 242 {
 243    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
 244 }
 245
 246
 247 /**
 248  * Return (mask & a) | (~mask & b);
 249  */
 250 LLVMValueRef
 251 lp_build_select_bitwise(struct lp_build_context *bld,
 252                         LLVMValueRef mask,
 253                         LLVMValueRef a,
 254                         LLVMValueRef b)
 255 {
 256    LLVMBuilderRef builder = bld->gallivm->builder;
 257    struct lp_type type = bld->type;
 258    LLVMValueRef res;
 259
 260    assert(lp_check_value(type, a));
 261    assert(lp_check_value(type, b));
 262
 263    if (a == b) {
 264       return a;
 265    }
 266
 267    if(type.floating) {
 268       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
 269       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
 270       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
 271    }
 272
 273    a = LLVMBuildAnd(builder, a, mask, "");
 274
 275    /* This often gets translated to PANDN, but sometimes the NOT is
 276     * pre-computed and stored in another constant. The best strategy depends
 277     * on available registers, so it is not a big deal -- hopefully LLVM does
 278     * the right decision attending the rest of the program.
 279     */
 280    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
 281
 282    res = LLVMBuildOr(builder, a, b, "");
 283
 284    if(type.floating) {
 285       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
 286       res = LLVMBuildBitCast(builder, res, vec_type, "");
 287    }
 288
 289    return res;
 290 }
 291
 292
 293 /**
 294  * Return mask ? a : b;
 295  *
 296  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
 297  * will yield unpredictable results.
 298  */
 299 LLVMValueRef
 300 lp_build_select(struct lp_build_context *bld,
 301                 LLVMValueRef mask,
 302                 LLVMValueRef a,
 303                 LLVMValueRef b)
 304 {
 305    LLVMBuilderRef builder = bld->gallivm->builder;
 306    LLVMContextRef lc = bld->gallivm->context;
 307    struct lp_type type = bld->type;
 308    LLVMValueRef res;
 309
 310    assert(lp_check_value(type, a));
 311    assert(lp_check_value(type, b));
 312
 313    if(a == b)
 314       return a;
 315
 316    if (type.length == 1) {
 317       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
 318       res = LLVMBuildSelect(builder, mask, a, b, "");
 319    }
 320    else if (!(HAVE_LLVM == 0x0307) &&
 321             (LLVMIsConstant(mask) ||
 322              LLVMGetInstructionOpcode(mask) == LLVMSExt)) {
 323       /* Generate a vector select.
 324        *
 325        * Using vector selects should avoid emitting intrinsics hence avoid
 326        * hindering optimization passes, but vector selects weren't properly
 327        * supported yet for a long time, and LLVM will generate poor code when
 328        * the mask is not the result of a comparison.
 329        * Also, llvm 3.7 may miscompile them (bug 94972).
 330        * XXX: Even if the instruction was an SExt, this may still produce
 331        * terrible code. Try piglit stencil-twoside.
 332        */
 333
 334       /* Convert the mask to a vector of booleans.
 335        *
 336        * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
 337        * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
 338        * what really happens is that LLVM will emit two shifts back to back.
 339        */
 340       if (0) {
 341          LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
 342          shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
 343          mask = LLVMBuildLShr(builder, mask, shift, "");
 344       }
 345       LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
 346       mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
 347
 348       res = LLVMBuildSelect(builder, mask, a, b, "");
 349    }
 350    else if (((util_cpu_caps.has_sse4_1 &&
 351               type.width * type.length == 128) ||
 352              (util_cpu_caps.has_avx &&
 353               type.width * type.length == 256 && type.width >= 32) ||
 354              (util_cpu_caps.has_avx2 &&
 355               type.width * type.length == 256)) &&
 356             !LLVMIsConstant(a) &&
 357             !LLVMIsConstant(b) &&
 358             !LLVMIsConstant(mask)) {
 359       const char *intrinsic;
 360       LLVMTypeRef arg_type;
 361       LLVMValueRef args[3];
 362
 363       /*
 364        *  There's only float blend in AVX but can just cast i32/i64
 365        *  to float.
 366        */
 367       if (type.width * type.length == 256) {
 368          if (type.width == 64) {
 369            intrinsic = "llvm.x86.avx.blendv.pd.256";
 370            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
 371          }
 372          else if (type.width == 32) {
 373             intrinsic = "llvm.x86.avx.blendv.ps.256";
 374             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
 375          } else {
 376             assert(util_cpu_caps.has_avx2);
 377             intrinsic = "llvm.x86.avx2.pblendvb";
 378             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
 379          }
 380       }
 381       else if (type.floating &&
 382                type.width == 64) {
 383          intrinsic = "llvm.x86.sse41.blendvpd";
 384          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
 385       } else if (type.floating &&
 386                  type.width == 32) {
 387          intrinsic = "llvm.x86.sse41.blendvps";
 388          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
 389       } else {
 390          intrinsic = "llvm.x86.sse41.pblendvb";
 391          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
 392       }
 393
 394       if (arg_type != bld->int_vec_type) {
 395          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
 396       }
 397
 398       if (arg_type != bld->vec_type) {
 399          a = LLVMBuildBitCast(builder, a, arg_type, "");
 400          b = LLVMBuildBitCast(builder, b, arg_type, "");
 401       }
 402
 403       args[0] = b;
 404       args[1] = a;
 405       args[2] = mask;
 406
 407       res = lp_build_intrinsic(builder, intrinsic,
 408                                arg_type, args, ARRAY_SIZE(args), 0);
 409
 410       if (arg_type != bld->vec_type) {
 411          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
 412       }
 413    }
 414    else {
 415       res = lp_build_select_bitwise(bld, mask, a, b);
 416    }
 417
 418    return res;
 419 }
 420
 421
 422 /**
 423  * Return mask ? a : b;
 424  *
 425  * mask is a TGSI_WRITEMASK_xxx.
 426  */
 427 LLVMValueRef
 428 lp_build_select_aos(struct lp_build_context *bld,
 429                     unsigned mask,
 430                     LLVMValueRef a,
 431                     LLVMValueRef b,
 432                     unsigned num_channels)
 433 {
 434    LLVMBuilderRef builder = bld->gallivm->builder;
 435    const struct lp_type type = bld->type;
 436    const unsigned n = type.length;
 437    unsigned i, j;
 438
 439    assert((mask & ~0xf) == 0);
 440    assert(lp_check_value(type, a));
 441    assert(lp_check_value(type, b));
 442
 443    if(a == b)
 444       return a;
 445    if((mask & 0xf) == 0xf)
 446       return a;
 447    if((mask & 0xf) == 0x0)
 448       return b;
 449    if(a == bld->undef || b == bld->undef)
 450       return bld->undef;
 451
 452    /*
 453     * There are two major ways of accomplishing this:
 454     * - with a shuffle
 455     * - with a select
 456     *
 457     * The flip between these is empirical and might need to be adjusted.
 458     */
 459    if (n <= 4) {
 460       /*
 461        * Shuffle.
 462        */
 463       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 464       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 465
 466       for(j = 0; j < n; j += num_channels)
 467          for(i = 0; i < num_channels; ++i)
 468             shuffles[j + i] = LLVMConstInt(elem_type,
 469                                            (mask & (1 << i) ? 0 : n) + j + i,
 470                                            0);
 471
 472       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
 473    }
 474    else {
 475       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
 476       return lp_build_select(bld, mask_vec, a, b);
 477    }
 478 }
 479
 480
 481 /**
 482  * Return (scalar-cast)val ? true : false;
 483  */
 484 LLVMValueRef
 485 lp_build_any_true_range(struct lp_build_context *bld,
 486                         unsigned real_length,
 487                         LLVMValueRef val)
 488 {
 489    LLVMBuilderRef builder = bld->gallivm->builder;
 490    LLVMTypeRef scalar_type;
 491    LLVMTypeRef true_type;
 492
 493    assert(real_length <= bld->type.length);
 494
 495    true_type = LLVMIntTypeInContext(bld->gallivm->context,
 496                                     bld->type.width * real_length);
 497    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
 498                                       bld->type.width * bld->type.length);
 499    val = LLVMBuildBitCast(builder, val, scalar_type, "");
 500    /*
 501     * We're using always native types so we can use intrinsics.
 502     * However, if we don't do per-element calculations, we must ensure
 503     * the excess elements aren't used since they may contain garbage.
 504     */
 505    if (real_length < bld->type.length) {
 506       val = LLVMBuildTrunc(builder, val, true_type, "");
 507    }
 508    return LLVMBuildICmp(builder, LLVMIntNE,
 509                         val, LLVMConstNull(true_type), "");
 510 }