src/gallium/auxiliary/gallivm/lp_bld_logic.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for logical operations.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include <llvm/Config/llvm-config.h>
  36
  37 #include "util/u_cpu_detect.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_debug.h"
  40
  41 #include "lp_bld_type.h"
  42 #include "lp_bld_const.h"
  43 #include "lp_bld_swizzle.h"
  44 #include "lp_bld_init.h"
  45 #include "lp_bld_intr.h"
  46 #include "lp_bld_debug.h"
  47 #include "lp_bld_logic.h"
  48
  49
  50 /*
  51  * XXX
  52  *
  53  * Selection with vector conditional like
  54  *
  55  *    select <4 x i1> %C, %A, %B
  56  *
  57  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  58  * supported on some backends (x86) starting with llvm 3.1.
  59  *
  60  * Expanding the boolean vector to full SIMD register width, as in
  61  *
  62  *    sext <4 x i1> %C to <4 x i32>
  63  *
  64  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  65  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  66  * LLVM 2.7.
  67  */
  68
  69
  70 /**
  71  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  72  * \param func  one of PIPE_FUNC_x
  73  * If the ordered argument is true the function will use LLVM's ordered
  74  * comparisons, otherwise unordered comparisons will be used.
  75  * The result values will be 0 for false or ~0 for true.
  76  */
  77 static LLVMValueRef
  78 lp_build_compare_ext(struct gallivm_state *gallivm,
  79                      const struct lp_type type,
  80                      unsigned func,
  81                      LLVMValueRef a,
  82                      LLVMValueRef b,
  83                      boolean ordered)
  84 {
  85    LLVMBuilderRef builder = gallivm->builder;
  86    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  87    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  88    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  89    LLVMValueRef cond;
  90    LLVMValueRef res;
  91
  92    assert(lp_check_value(type, a));
  93    assert(lp_check_value(type, b));
  94
  95    if(func == PIPE_FUNC_NEVER)
  96       return zeros;
  97    if(func == PIPE_FUNC_ALWAYS)
  98       return ones;
  99
 100    assert(func > PIPE_FUNC_NEVER);
 101    assert(func < PIPE_FUNC_ALWAYS);
 102
 103    if(type.floating) {
 104       LLVMRealPredicate op;
 105       switch(func) {
 106       case PIPE_FUNC_EQUAL:
 107          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
 108          break;
 109       case PIPE_FUNC_NOTEQUAL:
 110          op = ordered ? LLVMRealONE : LLVMRealUNE;
 111          break;
 112       case PIPE_FUNC_LESS:
 113          op = ordered ? LLVMRealOLT : LLVMRealULT;
 114          break;
 115       case PIPE_FUNC_LEQUAL:
 116          op = ordered ? LLVMRealOLE : LLVMRealULE;
 117          break;
 118       case PIPE_FUNC_GREATER:
 119          op = ordered ? LLVMRealOGT : LLVMRealUGT;
 120          break;
 121       case PIPE_FUNC_GEQUAL:
 122          op = ordered ? LLVMRealOGE : LLVMRealUGE;
 123          break;
 124       default:
 125          assert(0);
 126          return lp_build_undef(gallivm, type);
 127       }
 128
 129       cond = LLVMBuildFCmp(builder, op, a, b, "");
 130       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 131    }
 132    else {
 133       LLVMIntPredicate op;
 134       switch(func) {
 135       case PIPE_FUNC_EQUAL:
 136          op = LLVMIntEQ;
 137          break;
 138       case PIPE_FUNC_NOTEQUAL:
 139          op = LLVMIntNE;
 140          break;
 141       case PIPE_FUNC_LESS:
 142          op = type.sign ? LLVMIntSLT : LLVMIntULT;
 143          break;
 144       case PIPE_FUNC_LEQUAL:
 145          op = type.sign ? LLVMIntSLE : LLVMIntULE;
 146          break;
 147       case PIPE_FUNC_GREATER:
 148          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
 149          break;
 150       case PIPE_FUNC_GEQUAL:
 151          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
 152          break;
 153       default:
 154          assert(0);
 155          return lp_build_undef(gallivm, type);
 156       }
 157
 158       cond = LLVMBuildICmp(builder, op, a, b, "");
 159       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 160    }
 161
 162    return res;
 163 }
 164
 165 /**
 166  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 167  * \param func  one of PIPE_FUNC_x
 168  * The result values will be 0 for false or ~0 for true.
 169  */
 170 LLVMValueRef
 171 lp_build_compare(struct gallivm_state *gallivm,
 172                  const struct lp_type type,
 173                  unsigned func,
 174                  LLVMValueRef a,
 175                  LLVMValueRef b)
 176 {
 177    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
 178    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
 179    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
 180
 181    assert(lp_check_value(type, a));
 182    assert(lp_check_value(type, b));
 183
 184    if(func == PIPE_FUNC_NEVER)
 185       return zeros;
 186    if(func == PIPE_FUNC_ALWAYS)
 187       return ones;
 188
 189    assert(func > PIPE_FUNC_NEVER);
 190    assert(func < PIPE_FUNC_ALWAYS);
 191
 192 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 193    /*
 194     * There are no unsigned integer comparison instructions in SSE.
 195     */
 196
 197    if (!type.floating && !type.sign &&
 198        type.width * type.length == 128 &&
 199        util_cpu_caps.has_sse2 &&
 200        (func == PIPE_FUNC_LESS ||
 201         func == PIPE_FUNC_LEQUAL ||
 202         func == PIPE_FUNC_GREATER ||
 203         func == PIPE_FUNC_GEQUAL) &&
 204        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
 205          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
 206                       __FUNCTION__, type.length, type.width);
 207    }
 208 #endif
 209
 210    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
 211 }
 212
 213 /**
 214  * Build code to compare two values 'a' and 'b' using the given func.
 215  * \param func  one of PIPE_FUNC_x
 216  * If the operands are floating point numbers, the function will use
 217  * ordered comparison which means that it will return true if both
 218  * operands are not a NaN and the specified condition evaluates to true.
 219  * The result values will be 0 for false or ~0 for true.
 220  */
 221 LLVMValueRef
 222 lp_build_cmp_ordered(struct lp_build_context *bld,
 223                      unsigned func,
 224                      LLVMValueRef a,
 225                      LLVMValueRef b)
 226 {
 227    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
 228 }
 229
 230 /**
 231  * Build code to compare two values 'a' and 'b' using the given func.
 232  * \param func  one of PIPE_FUNC_x
 233  * If the operands are floating point numbers, the function will use
 234  * unordered comparison which means that it will return true if either
 235  * operand is a NaN or the specified condition evaluates to true.
 236  * The result values will be 0 for false or ~0 for true.
 237  */
 238 LLVMValueRef
 239 lp_build_cmp(struct lp_build_context *bld,
 240              unsigned func,
 241              LLVMValueRef a,
 242              LLVMValueRef b)
 243 {
 244    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
 245 }
 246
 247
 248 /**
 249  * Return (mask & a) | (~mask & b);
 250  */
 251 LLVMValueRef
 252 lp_build_select_bitwise(struct lp_build_context *bld,
 253                         LLVMValueRef mask,
 254                         LLVMValueRef a,
 255                         LLVMValueRef b)
 256 {
 257    LLVMBuilderRef builder = bld->gallivm->builder;
 258    struct lp_type type = bld->type;
 259    LLVMValueRef res;
 260
 261    assert(lp_check_value(type, a));
 262    assert(lp_check_value(type, b));
 263
 264    if (a == b) {
 265       return a;
 266    }
 267
 268    if(type.floating) {
 269       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
 270       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
 271       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
 272    }
 273
 274    a = LLVMBuildAnd(builder, a, mask, "");
 275
 276    /* This often gets translated to PANDN, but sometimes the NOT is
 277     * pre-computed and stored in another constant. The best strategy depends
 278     * on available registers, so it is not a big deal -- hopefully LLVM does
 279     * the right decision attending the rest of the program.
 280     */
 281    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
 282
 283    res = LLVMBuildOr(builder, a, b, "");
 284
 285    if(type.floating) {
 286       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
 287       res = LLVMBuildBitCast(builder, res, vec_type, "");
 288    }
 289
 290    return res;
 291 }
 292
 293
 294 /**
 295  * Return mask ? a : b;
 296  *
 297  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
 298  * will yield unpredictable results.
 299  */
 300 LLVMValueRef
 301 lp_build_select(struct lp_build_context *bld,
 302                 LLVMValueRef mask,
 303                 LLVMValueRef a,
 304                 LLVMValueRef b)
 305 {
 306    LLVMBuilderRef builder = bld->gallivm->builder;
 307    LLVMContextRef lc = bld->gallivm->context;
 308    struct lp_type type = bld->type;
 309    LLVMValueRef res;
 310
 311    assert(lp_check_value(type, a));
 312    assert(lp_check_value(type, b));
 313
 314    if(a == b)
 315       return a;
 316
 317    if (type.length == 1) {
 318       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
 319       res = LLVMBuildSelect(builder, mask, a, b, "");
 320    }
 321    else if (LLVMIsConstant(mask) ||
 322             LLVMGetInstructionOpcode(mask) == LLVMSExt) {
 323       /* Generate a vector select.
 324        *
 325        * Using vector selects should avoid emitting intrinsics hence avoid
 326        * hindering optimization passes, but vector selects weren't properly
 327        * supported yet for a long time, and LLVM will generate poor code when
 328        * the mask is not the result of a comparison.
 329        * XXX: Even if the instruction was an SExt, this may still produce
 330        * terrible code. Try piglit stencil-twoside.
 331        */
 332
 333       /* Convert the mask to a vector of booleans.
 334        *
 335        * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
 336        * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
 337        * what really happens is that LLVM will emit two shifts back to back.
 338        */
 339       if (0) {
 340          LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
 341          shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
 342          mask = LLVMBuildLShr(builder, mask, shift, "");
 343       }
 344       LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
 345       mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
 346
 347       res = LLVMBuildSelect(builder, mask, a, b, "");
 348    }
 349    else if (((util_cpu_caps.has_sse4_1 &&
 350               type.width * type.length == 128) ||
 351              (util_cpu_caps.has_avx &&
 352               type.width * type.length == 256 && type.width >= 32) ||
 353              (util_cpu_caps.has_avx2 &&
 354               type.width * type.length == 256)) &&
 355             !LLVMIsConstant(a) &&
 356             !LLVMIsConstant(b) &&
 357             !LLVMIsConstant(mask)) {
 358       const char *intrinsic;
 359       LLVMTypeRef arg_type;
 360       LLVMValueRef args[3];
 361
 362       /*
 363        *  There's only float blend in AVX but can just cast i32/i64
 364        *  to float.
 365        */
 366       if (type.width * type.length == 256) {
 367          if (type.width == 64) {
 368            intrinsic = "llvm.x86.avx.blendv.pd.256";
 369            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
 370          }
 371          else if (type.width == 32) {
 372             intrinsic = "llvm.x86.avx.blendv.ps.256";
 373             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
 374          } else {
 375             assert(util_cpu_caps.has_avx2);
 376             intrinsic = "llvm.x86.avx2.pblendvb";
 377             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
 378          }
 379       }
 380       else if (type.floating &&
 381                type.width == 64) {
 382          intrinsic = "llvm.x86.sse41.blendvpd";
 383          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
 384       } else if (type.floating &&
 385                  type.width == 32) {
 386          intrinsic = "llvm.x86.sse41.blendvps";
 387          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
 388       } else {
 389          intrinsic = "llvm.x86.sse41.pblendvb";
 390          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
 391       }
 392
 393       if (arg_type != bld->int_vec_type) {
 394          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
 395       }
 396
 397       if (arg_type != bld->vec_type) {
 398          a = LLVMBuildBitCast(builder, a, arg_type, "");
 399          b = LLVMBuildBitCast(builder, b, arg_type, "");
 400       }
 401
 402       args[0] = b;
 403       args[1] = a;
 404       args[2] = mask;
 405
 406       res = lp_build_intrinsic(builder, intrinsic,
 407                                arg_type, args, ARRAY_SIZE(args), 0);
 408
 409       if (arg_type != bld->vec_type) {
 410          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
 411       }
 412    }
 413    else {
 414       res = lp_build_select_bitwise(bld, mask, a, b);
 415    }
 416
 417    return res;
 418 }
 419
 420
 421 /**
 422  * Return mask ? a : b;
 423  *
 424  * mask is a TGSI_WRITEMASK_xxx.
 425  */
 426 LLVMValueRef
 427 lp_build_select_aos(struct lp_build_context *bld,
 428                     unsigned mask,
 429                     LLVMValueRef a,
 430                     LLVMValueRef b,
 431                     unsigned num_channels)
 432 {
 433    LLVMBuilderRef builder = bld->gallivm->builder;
 434    const struct lp_type type = bld->type;
 435    const unsigned n = type.length;
 436    unsigned i, j;
 437
 438    assert((mask & ~0xf) == 0);
 439    assert(lp_check_value(type, a));
 440    assert(lp_check_value(type, b));
 441
 442    if(a == b)
 443       return a;
 444    if((mask & 0xf) == 0xf)
 445       return a;
 446    if((mask & 0xf) == 0x0)
 447       return b;
 448    if(a == bld->undef || b == bld->undef)
 449       return bld->undef;
 450
 451    /*
 452     * There are two major ways of accomplishing this:
 453     * - with a shuffle
 454     * - with a select
 455     *
 456     * The flip between these is empirical and might need to be adjusted.
 457     */
 458    if (n <= 4) {
 459       /*
 460        * Shuffle.
 461        */
 462       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 463       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 464
 465       for(j = 0; j < n; j += num_channels)
 466          for(i = 0; i < num_channels; ++i)
 467             shuffles[j + i] = LLVMConstInt(elem_type,
 468                                            (mask & (1 << i) ? 0 : n) + j + i,
 469                                            0);
 470
 471       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
 472    }
 473    else {
 474       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
 475       return lp_build_select(bld, mask_vec, a, b);
 476    }
 477 }
 478
 479
 480 /**
 481  * Return (scalar-cast)val ? true : false;
 482  */
 483 LLVMValueRef
 484 lp_build_any_true_range(struct lp_build_context *bld,
 485                         unsigned real_length,
 486                         LLVMValueRef val)
 487 {
 488    LLVMBuilderRef builder = bld->gallivm->builder;
 489    LLVMTypeRef scalar_type;
 490    LLVMTypeRef true_type;
 491
 492    assert(real_length <= bld->type.length);
 493
 494    true_type = LLVMIntTypeInContext(bld->gallivm->context,
 495                                     bld->type.width * real_length);
 496    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
 497                                       bld->type.width * bld->type.length);
 498    val = LLVMBuildBitCast(builder, val, scalar_type, "");
 499    /*
 500     * We're using always native types so we can use intrinsics.
 501     * However, if we don't do per-element calculations, we must ensure
 502     * the excess elements aren't used since they may contain garbage.
 503     */
 504    if (real_length < bld->type.length) {
 505       val = LLVMBuildTrunc(builder, val, true_type, "");
 506    }
 507    return LLVMBuildICmp(builder, LLVMIntNE,
 508                         val, LLVMConstNull(true_type), "");
 509 }