src/gallium/auxiliary/gallivm/lp_bld_logic.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for logical operations.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_cpu_detect.h"
  37 #include "util/u_debug.h"
  38
  39 #include "lp_bld_type.h"
  40 #include "lp_bld_const.h"
  41 #include "lp_bld_intr.h"
  42 #include "lp_bld_logic.h"
  43
  44
  45 /*
  46  * XXX
  47  *
  48  * Selection with vector conditional like
  49  *
  50  *    select <4 x i1> %C, %A, %B
  51  *
  52  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
  53  * supported on any backend.
  54  *
  55  * Expanding the boolean vector to full SIMD register width, as in
  56  *
  57  *    sext <4 x i1> %C to <4 x i32>
  58  *
  59  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  60  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  61  * LLVM 2.7.
  62  */
  63
  64
  65 /**
  66  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  67  * \param func  one of PIPE_FUNC_x
  68  * The result values will be 0 for false or ~0 for true.
  69  */
  70 LLVMValueRef
  71 lp_build_compare(LLVMBuilderRef builder,
  72                  const struct lp_type type,
  73                  unsigned func,
  74                  LLVMValueRef a,
  75                  LLVMValueRef b)
  76 {
  77    LLVMTypeRef vec_type = lp_build_vec_type(type);
  78    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
  79    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  80    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  81    LLVMValueRef cond;
  82    LLVMValueRef res;
  83    unsigned i;
  84
  85    assert(func >= PIPE_FUNC_NEVER);
  86    assert(func <= PIPE_FUNC_ALWAYS);
  87
  88    if(func == PIPE_FUNC_NEVER)
  89       return zeros;
  90    if(func == PIPE_FUNC_ALWAYS)
  91       return ones;
  92
  93    /* TODO: optimize the constant case */
  94
  95    /* XXX: It is not clear if we should use the ordered or unordered operators */
  96
  97 #if !defined(HAVE_LLVM) || HAVE_LLVM < 0x0207
  98 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  99    if(type.width * type.length == 128) {
 100       if(type.floating && util_cpu_caps.has_sse) {
 101          /* float[4] comparison */
 102          LLVMValueRef args[3];
 103          unsigned cc;
 104          boolean swap;
 105
 106          swap = FALSE;
 107          switch(func) {
 108          case PIPE_FUNC_EQUAL:
 109             cc = 0;
 110             break;
 111          case PIPE_FUNC_NOTEQUAL:
 112             cc = 4;
 113             break;
 114          case PIPE_FUNC_LESS:
 115             cc = 1;
 116             break;
 117          case PIPE_FUNC_LEQUAL:
 118             cc = 2;
 119             break;
 120          case PIPE_FUNC_GREATER:
 121             cc = 1;
 122             swap = TRUE;
 123             break;
 124          case PIPE_FUNC_GEQUAL:
 125             cc = 2;
 126             swap = TRUE;
 127             break;
 128          default:
 129             assert(0);
 130             return lp_build_undef(type);
 131          }
 132
 133          if(swap) {
 134             args[0] = b;
 135             args[1] = a;
 136          }
 137          else {
 138             args[0] = a;
 139             args[1] = b;
 140          }
 141
 142          args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
 143          res = lp_build_intrinsic(builder,
 144                                   "llvm.x86.sse.cmp.ps",
 145                                   vec_type,
 146                                   args, 3);
 147          res = LLVMBuildBitCast(builder, res, int_vec_type, "");
 148          return res;
 149       }
 150       else if(util_cpu_caps.has_sse2) {
 151          /* int[4] comparison */
 152          static const struct {
 153             unsigned swap:1;
 154             unsigned eq:1;
 155             unsigned gt:1;
 156             unsigned not:1;
 157          } table[] = {
 158             {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
 159             {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
 160             {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
 161             {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
 162             {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
 163             {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
 164             {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
 165             {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
 166          };
 167          const char *pcmpeq;
 168          const char *pcmpgt;
 169          LLVMValueRef args[2];
 170          LLVMValueRef res;
 171
 172          switch (type.width) {
 173          case 8:
 174             pcmpeq = "llvm.x86.sse2.pcmpeq.b";
 175             pcmpgt = "llvm.x86.sse2.pcmpgt.b";
 176             break;
 177          case 16:
 178             pcmpeq = "llvm.x86.sse2.pcmpeq.w";
 179             pcmpgt = "llvm.x86.sse2.pcmpgt.w";
 180             break;
 181          case 32:
 182             pcmpeq = "llvm.x86.sse2.pcmpeq.d";
 183             pcmpgt = "llvm.x86.sse2.pcmpgt.d";
 184             break;
 185          default:
 186             assert(0);
 187             return lp_build_undef(type);
 188          }
 189
 190          /* There are no signed byte and unsigned word/dword comparison
 191           * instructions. So flip the sign bit so that the results match.
 192           */
 193          if(table[func].gt &&
 194             ((type.width == 8 && type.sign) ||
 195              (type.width != 8 && !type.sign))) {
 196             LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 197             a = LLVMBuildXor(builder, a, msb, "");
 198             b = LLVMBuildXor(builder, b, msb, "");
 199          }
 200
 201          if(table[func].swap) {
 202             args[0] = b;
 203             args[1] = a;
 204          }
 205          else {
 206             args[0] = a;
 207             args[1] = b;
 208          }
 209
 210          if(table[func].eq)
 211             res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
 212          else if (table[func].gt)
 213             res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
 214          else
 215             res = LLVMConstNull(vec_type);
 216
 217          if(table[func].not)
 218             res = LLVMBuildNot(builder, res, "");
 219
 220          return res;
 221       }
 222    } /* if (type.width * type.length == 128) */
 223 #endif
 224 #endif /* HAVE_LLVM < 0x0207 */
 225
 226    if(type.floating) {
 227       LLVMRealPredicate op;
 228       switch(func) {
 229       case PIPE_FUNC_NEVER:
 230          op = LLVMRealPredicateFalse;
 231          break;
 232       case PIPE_FUNC_ALWAYS:
 233          op = LLVMRealPredicateTrue;
 234          break;
 235       case PIPE_FUNC_EQUAL:
 236          op = LLVMRealUEQ;
 237          break;
 238       case PIPE_FUNC_NOTEQUAL:
 239          op = LLVMRealUNE;
 240          break;
 241       case PIPE_FUNC_LESS:
 242          op = LLVMRealULT;
 243          break;
 244       case PIPE_FUNC_LEQUAL:
 245          op = LLVMRealULE;
 246          break;
 247       case PIPE_FUNC_GREATER:
 248          op = LLVMRealUGT;
 249          break;
 250       case PIPE_FUNC_GEQUAL:
 251          op = LLVMRealUGE;
 252          break;
 253       default:
 254          assert(0);
 255          return lp_build_undef(type);
 256       }
 257
 258 #if HAVE_LLVM >= 0x0207
 259       cond = LLVMBuildFCmp(builder, op, a, b, "");
 260       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 261 #else
 262       res = LLVMGetUndef(int_vec_type);
 263       if (type.length == 1) {
 264          res = LLVMBuildFCmp(builder, op, a, b, "");
 265       }
 266       else {
 267          debug_printf("%s: warning: using slow element-wise float"
 268                       " vector comparison\n", __FUNCTION__);
 269          for (i = 0; i < type.length; ++i) {
 270             LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 271             cond = LLVMBuildFCmp(builder, op,
 272                                  LLVMBuildExtractElement(builder, a, index, ""),
 273                                  LLVMBuildExtractElement(builder, b, index, ""),
 274                                  "");
 275             cond = LLVMBuildSelect(builder, cond,
 276                                    LLVMConstExtractElement(ones, index),
 277                                    LLVMConstExtractElement(zeros, index),
 278                                    "");
 279             res = LLVMBuildInsertElement(builder, res, cond, index, "");
 280          }
 281       }
 282 #endif
 283    }
 284    else {
 285       LLVMIntPredicate op;
 286       switch(func) {
 287       case PIPE_FUNC_EQUAL:
 288          op = LLVMIntEQ;
 289          break;
 290       case PIPE_FUNC_NOTEQUAL:
 291          op = LLVMIntNE;
 292          break;
 293       case PIPE_FUNC_LESS:
 294          op = type.sign ? LLVMIntSLT : LLVMIntULT;
 295          break;
 296       case PIPE_FUNC_LEQUAL:
 297          op = type.sign ? LLVMIntSLE : LLVMIntULE;
 298          break;
 299       case PIPE_FUNC_GREATER:
 300          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
 301          break;
 302       case PIPE_FUNC_GEQUAL:
 303          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
 304          break;
 305       default:
 306          assert(0);
 307          return lp_build_undef(type);
 308       }
 309
 310 #if HAVE_LLVM >= 0x0207
 311       cond = LLVMBuildICmp(builder, op, a, b, "");
 312       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
 313 #else
 314       res = LLVMGetUndef(int_vec_type);
 315       if (type.length == 1) {
 316          res = LLVMBuildICmp(builder, op, a, b, "");
 317       }
 318       else {
 319          debug_printf("%s: warning: using slow element-wise int"
 320                       " vector comparison\n", __FUNCTION__);
 321
 322          for(i = 0; i < type.length; ++i) {
 323             LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 324             cond = LLVMBuildICmp(builder, op,
 325                                  LLVMBuildExtractElement(builder, a, index, ""),
 326                                  LLVMBuildExtractElement(builder, b, index, ""),
 327                                  "");
 328             cond = LLVMBuildSelect(builder, cond,
 329                                    LLVMConstExtractElement(ones, index),
 330                                    LLVMConstExtractElement(zeros, index),
 331                                    "");
 332             res = LLVMBuildInsertElement(builder, res, cond, index, "");
 333          }
 334       }
 335 #endif
 336    }
 337
 338    return res;
 339 }
 340
 341
 342
 343 /**
 344  * Build code to compare two values 'a' and 'b' using the given func.
 345  * \param func  one of PIPE_FUNC_x
 346  * The result values will be 0 for false or ~0 for true.
 347  */
 348 LLVMValueRef
 349 lp_build_cmp(struct lp_build_context *bld,
 350              unsigned func,
 351              LLVMValueRef a,
 352              LLVMValueRef b)
 353 {
 354    return lp_build_compare(bld->builder, bld->type, func, a, b);
 355 }
 356
 357
 358 /**
 359  * Return mask ? a : b;
 360  */
 361 LLVMValueRef
 362 lp_build_select(struct lp_build_context *bld,
 363                 LLVMValueRef mask,
 364                 LLVMValueRef a,
 365                 LLVMValueRef b)
 366 {
 367    struct lp_type type = bld->type;
 368    LLVMValueRef res;
 369
 370    if(a == b)
 371       return a;
 372
 373    if (type.length == 1) {
 374       res = LLVMBuildSelect(bld->builder, mask, a, b, "");
 375    }
 376    else {
 377       if(type.floating) {
 378          LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 379          a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 380          b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
 381       }
 382
 383       a = LLVMBuildAnd(bld->builder, a, mask, "");
 384
 385       /* This often gets translated to PANDN, but sometimes the NOT is
 386        * pre-computed and stored in another constant. The best strategy depends
 387        * on available registers, so it is not a big deal -- hopefully LLVM does
 388        * the right decision attending the rest of the program.
 389        */
 390       b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
 391
 392       res = LLVMBuildOr(bld->builder, a, b, "");
 393
 394       if(type.floating) {
 395          LLVMTypeRef vec_type = lp_build_vec_type(type);
 396          res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
 397       }
 398    }
 399
 400    return res;
 401 }
 402
 403
 404 LLVMValueRef
 405 lp_build_select_aos(struct lp_build_context *bld,
 406                     LLVMValueRef a,
 407                     LLVMValueRef b,
 408                     const boolean cond[4])
 409 {
 410    const struct lp_type type = bld->type;
 411    const unsigned n = type.length;
 412    unsigned i, j;
 413
 414    if(a == b)
 415       return a;
 416    if(cond[0] && cond[1] && cond[2] && cond[3])
 417       return a;
 418    if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
 419       return b;
 420    if(a == bld->undef || b == bld->undef)
 421       return bld->undef;
 422
 423    /*
 424     * There are three major ways of accomplishing this:
 425     * - with a shuffle,
 426     * - with a select,
 427     * - or with a bit mask.
 428     *
 429     * Select isn't supported for vector types yet.
 430     * The flip between these is empirical and might need to be.
 431     */
 432    if (n <= 4) {
 433       /*
 434        * Shuffle.
 435        */
 436       LLVMTypeRef elem_type = LLVMInt32Type();
 437       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 438
 439       for(j = 0; j < n; j += 4)
 440          for(i = 0; i < 4; ++i)
 441             shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
 442
 443       return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
 444    }
 445    else {
 446 #if 0
 447       /* XXX: Unfortunately select of vectors do not work */
 448       /* Use a select */
 449       LLVMTypeRef elem_type = LLVMInt1Type();
 450       LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
 451
 452       for(j = 0; j < n; j += 4)
 453          for(i = 0; i < 4; ++i)
 454             cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
 455
 456       return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
 457 #else
 458       LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
 459       return lp_build_select(bld, mask, a, b);
 460 #endif
 461    }
 462 }
 463
 464 LLVMValueRef
 465 lp_build_alloca(struct lp_build_context *bld)
 466 {
 467    const struct lp_type type = bld->type;
 468
 469    if (type.length > 1) { /*vector*/
 470       return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), "");
 471    } else { /*scalar*/
 472       return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), "");
 473    }
 474 }