src/gallium/auxiliary/gallivm/lp_bld_logic.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for logical operations.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include <llvm/Config/llvm-config.h>
  36
  37 #include "util/u_cpu_detect.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_debug.h"
  40
  41 #include "lp_bld_type.h"
  42 #include "lp_bld_const.h"
  43 #include "lp_bld_swizzle.h"
  44 #include "lp_bld_init.h"
  45 #include "lp_bld_intr.h"
  46 #include "lp_bld_debug.h"
  47 #include "lp_bld_logic.h"
  48
  49
  50 /*
  51  * XXX
  52  *
  53  * Selection with vector conditional like
  54  *
  55  *    select <4 x i1> %C, %A, %B
  56  *
  57  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  58  * supported on some backends (x86) starting with llvm 3.1.
  59  *
  60  * Expanding the boolean vector to full SIMD register width, as in
  61  *
  62  *    sext <4 x i1> %C to <4 x i32>
  63  *
  64  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  65  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  66  * LLVM 2.7.
  67  */
  68
  69
  70 /**
  71  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  72  * \param func  one of PIPE_FUNC_x
  73  * If the ordered argument is true the function will use LLVM's ordered
  74  * comparisons, otherwise unordered comparisons will be used.
  75  * The result values will be 0 for false or ~0 for true.
  76  */
  77 static LLVMValueRef
  78 lp_build_compare_ext(struct gallivm_state *gallivm,
  79                      const struct lp_type type,
  80                      unsigned func,
  81                      LLVMValueRef a,
  82                      LLVMValueRef b,
  83                      boolean ordered)
  84 {
  85    LLVMBuilderRef builder = gallivm->builder;
  86    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  87    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  88    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  89    LLVMValueRef cond;
  90    LLVMValueRef res;
  91
  92    assert(lp_check_value(type, a));
  93    assert(lp_check_value(type, b));
  94
  95    if(func == PIPE_FUNC_NEVER)
  96       return zeros;
  97    if(func == PIPE_FUNC_ALWAYS)
  98       return ones;
  99
 100    assert(func > PIPE_FUNC_NEVER);
 101    assert(func < PIPE_FUNC_ALWAYS);
 102
 103    if(type.floating) {
 104       LLVMRealPredicate op;
 105       switch(func) {
 106       case PIPE_FUNC_EQUAL:
 107          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
 108          break;
 109       case PIPE_FUNC_NOTEQUAL:
 110          op = ordered ? LLVMRealONE : LLVMRealUNE;
 111          break;
 112       case PIPE_FUNC_LESS:
 113          op = ordered ? LLVMRealOLT : LLVMRealULT;
 114          break;
 115       case PIPE_FUNC_LEQUAL:
 116          op = ordered ? LLVMRealOLE : LLVMRealULE;
 117          break;
 118       case PIPE_FUNC_GREATER:
 119          op = ordered ? LLVMRealOGT : LLVMRealUGT;
 120          break;
 121       case PIPE_FUNC_GEQUAL:
 122          op = ordered ? LLVMRealOGE : LLVMRealUGE;
 123          break;
 124       default:
 125          assert(0);
 126          return lp_build_undef(gallivm, type);
 127       }
 128
 129       cond = LLVMBuildFCmp(builder, op, a, b, "");
 130       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 131    }
 132    else {
 133       LLVMIntPredicate op;
 134       switch(func) {
 135       case PIPE_FUNC_EQUAL:
 136          op = LLVMIntEQ;
 137          break;
 138       case PIPE_FUNC_NOTEQUAL:
 139          op = LLVMIntNE;
 140          break;
 141       case PIPE_FUNC_LESS:
 142          op = type.sign ? LLVMIntSLT : LLVMIntULT;
 143          break;
 144       case PIPE_FUNC_LEQUAL:
 145          op = type.sign ? LLVMIntSLE : LLVMIntULE;
 146          break;
 147       case PIPE_FUNC_GREATER:
 148          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
 149          break;
 150       case PIPE_FUNC_GEQUAL:
 151          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
 152          break;
 153       default:
 154          assert(0);
 155          return lp_build_undef(gallivm, type);
 156       }
 157
 158       cond = LLVMBuildICmp(builder, op, a, b, "");
 159       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 160    }
 161
 162    return res;
 163 }
 164
 165 /**
 166  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 167  * \param func  one of PIPE_FUNC_x
 168  * The result values will be 0 for false or ~0 for true.
 169  */
 170 LLVMValueRef
 171 lp_build_compare(struct gallivm_state *gallivm,
 172                  const struct lp_type type,
 173                  unsigned func,
 174                  LLVMValueRef a,
 175                  LLVMValueRef b)
 176 {
 177    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
 178    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
 179    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
 180
 181    assert(lp_check_value(type, a));
 182    assert(lp_check_value(type, b));
 183
 184    if(func == PIPE_FUNC_NEVER)
 185       return zeros;
 186    if(func == PIPE_FUNC_ALWAYS)
 187       return ones;
 188
 189    assert(func > PIPE_FUNC_NEVER);
 190    assert(func < PIPE_FUNC_ALWAYS);
 191
 192 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 193    /*
 194     * There are no unsigned integer comparison instructions in SSE.
 195     */
 196
 197    if (!type.floating && !type.sign &&
 198        type.width * type.length == 128 &&
 199        util_cpu_caps.has_sse2 &&
 200        (func == PIPE_FUNC_LESS ||
 201         func == PIPE_FUNC_LEQUAL ||
 202         func == PIPE_FUNC_GREATER ||
 203         func == PIPE_FUNC_GEQUAL) &&
 204        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
 205          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
 206                       __FUNCTION__, type.length, type.width);
 207    }
 208 #endif
 209
 210    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
 211 }
 212
 213 /**
 214  * Build code to compare two values 'a' and 'b' using the given func.
 215  * \param func  one of PIPE_FUNC_x
 216  * If the operands are floating point numbers, the function will use
 217  * ordered comparison which means that it will return true if both
 218  * operands are not a NaN and the specified condition evaluates to true.
 219  * The result values will be 0 for false or ~0 for true.
 220  */
 221 LLVMValueRef
 222 lp_build_cmp_ordered(struct lp_build_context *bld,
 223                      unsigned func,
 224                      LLVMValueRef a,
 225                      LLVMValueRef b)
 226 {
 227    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
 228 }
 229
 230 /**
 231  * Build code to compare two values 'a' and 'b' using the given func.
 232  * \param func  one of PIPE_FUNC_x
 233  * If the operands are floating point numbers, the function will use
 234  * unordered comparison which means that it will return true if either
 235  * operand is a NaN or the specified condition evaluates to true.
 236  * The result values will be 0 for false or ~0 for true.
 237  */
 238 LLVMValueRef
 239 lp_build_cmp(struct lp_build_context *bld,
 240              unsigned func,
 241              LLVMValueRef a,
 242              LLVMValueRef b)
 243 {
 244    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
 245 }
 246
 247
 248 /**
 249  * Return (mask & a) | (~mask & b);
 250  */
 251 LLVMValueRef
 252 lp_build_select_bitwise(struct lp_build_context *bld,
 253                         LLVMValueRef mask,
 254                         LLVMValueRef a,
 255                         LLVMValueRef b)
 256 {
 257    LLVMBuilderRef builder = bld->gallivm->builder;
 258    struct lp_type type = bld->type;
 259    LLVMValueRef res;
 260    LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
 261
 262    assert(lp_check_value(type, a));
 263    assert(lp_check_value(type, b));
 264
 265    if (a == b) {
 266       return a;
 267    }
 268
 269    if(type.floating) {
 270       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
 271       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
 272    }
 273
 274    if (type.width > 32)
 275       mask = LLVMBuildSExt(builder, mask, int_vec_type, "");
 276    a = LLVMBuildAnd(builder, a, mask, "");
 277
 278    /* This often gets translated to PANDN, but sometimes the NOT is
 279     * pre-computed and stored in another constant. The best strategy depends
 280     * on available registers, so it is not a big deal -- hopefully LLVM does
 281     * the right decision attending the rest of the program.
 282     */
 283    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
 284
 285    res = LLVMBuildOr(builder, a, b, "");
 286
 287    if(type.floating) {
 288       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
 289       res = LLVMBuildBitCast(builder, res, vec_type, "");
 290    }
 291
 292    return res;
 293 }
 294
 295
 296 /**
 297  * Return mask ? a : b;
 298  *
 299  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
 300  * will yield unpredictable results.
 301  */
 302 LLVMValueRef
 303 lp_build_select(struct lp_build_context *bld,
 304                 LLVMValueRef mask,
 305                 LLVMValueRef a,
 306                 LLVMValueRef b)
 307 {
 308    LLVMBuilderRef builder = bld->gallivm->builder;
 309    LLVMContextRef lc = bld->gallivm->context;
 310    struct lp_type type = bld->type;
 311    LLVMValueRef res;
 312
 313    assert(lp_check_value(type, a));
 314    assert(lp_check_value(type, b));
 315
 316    if(a == b)
 317       return a;
 318
 319    if (type.length == 1) {
 320       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
 321       res = LLVMBuildSelect(builder, mask, a, b, "");
 322    }
 323    else if (LLVMIsConstant(mask) ||
 324             LLVMGetInstructionOpcode(mask) == LLVMSExt) {
 325       /* Generate a vector select.
 326        *
 327        * Using vector selects should avoid emitting intrinsics hence avoid
 328        * hindering optimization passes, but vector selects weren't properly
 329        * supported yet for a long time, and LLVM will generate poor code when
 330        * the mask is not the result of a comparison.
 331        * XXX: Even if the instruction was an SExt, this may still produce
 332        * terrible code. Try piglit stencil-twoside.
 333        */
 334
 335       /* Convert the mask to a vector of booleans.
 336        *
 337        * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
 338        * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
 339        * what really happens is that LLVM will emit two shifts back to back.
 340        */
 341       if (0) {
 342          LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
 343          shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
 344          mask = LLVMBuildLShr(builder, mask, shift, "");
 345       }
 346       LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
 347       mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
 348
 349       res = LLVMBuildSelect(builder, mask, a, b, "");
 350    }
 351    else if (((util_cpu_caps.has_sse4_1 &&
 352               type.width * type.length == 128) ||
 353              (util_cpu_caps.has_avx &&
 354               type.width * type.length == 256 && type.width >= 32) ||
 355              (util_cpu_caps.has_avx2 &&
 356               type.width * type.length == 256)) &&
 357             !LLVMIsConstant(a) &&
 358             !LLVMIsConstant(b) &&
 359             !LLVMIsConstant(mask)) {
 360       const char *intrinsic;
 361       LLVMTypeRef arg_type;
 362       LLVMValueRef args[3];
 363
 364       LLVMTypeRef mask_type = LLVMGetElementType(LLVMTypeOf(mask));
 365       if (LLVMGetIntTypeWidth(mask_type) != type.width) {
 366          LLVMTypeRef int_vec_type = LLVMVectorType(LLVMIntTypeInContext(lc, type.width), type.length);
 367          mask = LLVMBuildSExt(builder, mask, int_vec_type, "");
 368       }
 369       /*
 370        *  There's only float blend in AVX but can just cast i32/i64
 371        *  to float.
 372        */
 373       if (type.width * type.length == 256) {
 374          if (type.width == 64) {
 375            intrinsic = "llvm.x86.avx.blendv.pd.256";
 376            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
 377          }
 378          else if (type.width == 32) {
 379             intrinsic = "llvm.x86.avx.blendv.ps.256";
 380             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
 381          } else {
 382             assert(util_cpu_caps.has_avx2);
 383             intrinsic = "llvm.x86.avx2.pblendvb";
 384             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
 385          }
 386       }
 387       else if (type.floating &&
 388                type.width == 64) {
 389          intrinsic = "llvm.x86.sse41.blendvpd";
 390          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
 391       } else if (type.floating &&
 392                  type.width == 32) {
 393          intrinsic = "llvm.x86.sse41.blendvps";
 394          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
 395       } else {
 396          intrinsic = "llvm.x86.sse41.pblendvb";
 397          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
 398       }
 399
 400       if (arg_type != bld->int_vec_type) {
 401          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
 402       }
 403
 404       if (arg_type != bld->vec_type) {
 405          a = LLVMBuildBitCast(builder, a, arg_type, "");
 406          b = LLVMBuildBitCast(builder, b, arg_type, "");
 407       }
 408
 409       args[0] = b;
 410       args[1] = a;
 411       args[2] = mask;
 412
 413       res = lp_build_intrinsic(builder, intrinsic,
 414                                arg_type, args, ARRAY_SIZE(args), 0);
 415
 416       if (arg_type != bld->vec_type) {
 417          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
 418       }
 419    }
 420    else {
 421       res = lp_build_select_bitwise(bld, mask, a, b);
 422    }
 423
 424    return res;
 425 }
 426
 427
 428 /**
 429  * Return mask ? a : b;
 430  *
 431  * mask is a TGSI_WRITEMASK_xxx.
 432  */
 433 LLVMValueRef
 434 lp_build_select_aos(struct lp_build_context *bld,
 435                     unsigned mask,
 436                     LLVMValueRef a,
 437                     LLVMValueRef b,
 438                     unsigned num_channels)
 439 {
 440    LLVMBuilderRef builder = bld->gallivm->builder;
 441    const struct lp_type type = bld->type;
 442    const unsigned n = type.length;
 443    unsigned i, j;
 444
 445    assert((mask & ~0xf) == 0);
 446    assert(lp_check_value(type, a));
 447    assert(lp_check_value(type, b));
 448
 449    if(a == b)
 450       return a;
 451    if((mask & 0xf) == 0xf)
 452       return a;
 453    if((mask & 0xf) == 0x0)
 454       return b;
 455    if(a == bld->undef || b == bld->undef)
 456       return bld->undef;
 457
 458    /*
 459     * There are two major ways of accomplishing this:
 460     * - with a shuffle
 461     * - with a select
 462     *
 463     * The flip between these is empirical and might need to be adjusted.
 464     */
 465    if (n <= 4) {
 466       /*
 467        * Shuffle.
 468        */
 469       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 470       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 471
 472       for(j = 0; j < n; j += num_channels)
 473          for(i = 0; i < num_channels; ++i)
 474             shuffles[j + i] = LLVMConstInt(elem_type,
 475                                            (mask & (1 << i) ? 0 : n) + j + i,
 476                                            0);
 477
 478       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
 479    }
 480    else {
 481       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
 482       return lp_build_select(bld, mask_vec, a, b);
 483    }
 484 }
 485
 486
 487 /**
 488  * Return (scalar-cast)val ? true : false;
 489  */
 490 LLVMValueRef
 491 lp_build_any_true_range(struct lp_build_context *bld,
 492                         unsigned real_length,
 493                         LLVMValueRef val)
 494 {
 495    LLVMBuilderRef builder = bld->gallivm->builder;
 496    LLVMTypeRef scalar_type;
 497    LLVMTypeRef true_type;
 498
 499    assert(real_length <= bld->type.length);
 500
 501    true_type = LLVMIntTypeInContext(bld->gallivm->context,
 502                                     bld->type.width * real_length);
 503    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
 504                                       bld->type.width * bld->type.length);
 505    val = LLVMBuildBitCast(builder, val, scalar_type, "");
 506    /*
 507     * We're using always native types so we can use intrinsics.
 508     * However, if we don't do per-element calculations, we must ensure
 509     * the excess elements aren't used since they may contain garbage.
 510     */
 511    if (real_length < bld->type.length) {
 512       val = LLVMBuildTrunc(builder, val, true_type, "");
 513    }
 514    return LLVMBuildICmp(builder, LLVMIntNE,
 515                         val, LLVMConstNull(true_type), "");
 516 }