src/gallium/auxiliary/gallivm/lp_bld_swizzle.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for swizzling/shuffling.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_debug.h"
  37
  38 #include "lp_bld_type.h"
  39 #include "lp_bld_const.h"
  40 #include "lp_bld_logic.h"
  41 #include "lp_bld_swizzle.h"
  42
  43
  44 LLVMValueRef
  45 lp_build_broadcast(LLVMBuilderRef builder,
  46                    LLVMTypeRef vec_type,
  47                    LLVMValueRef scalar)
  48 {
  49    const unsigned n = LLVMGetVectorSize(vec_type);
  50    LLVMValueRef res;
  51    unsigned i;
  52
  53    res = LLVMGetUndef(vec_type);
  54    for(i = 0; i < n; ++i) {
  55       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
  56       res = LLVMBuildInsertElement(builder, res, scalar, index, "");
  57    }
  58
  59    return res;
  60 }
  61
  62
  63 /**
  64  * Broadcast
  65  */
  66 LLVMValueRef
  67 lp_build_broadcast_scalar(struct lp_build_context *bld,
  68                           LLVMValueRef scalar)
  69 {
  70    const struct lp_type type = bld->type;
  71
  72    assert(lp_check_elem_type(type, LLVMTypeOf(scalar)));
  73
  74    if (type.length == 1) {
  75       return scalar;
  76    }
  77    else {
  78       LLVMValueRef res;
  79 #if HAVE_LLVM >= 0x207
  80       res = LLVMBuildInsertElement(bld->builder, bld->undef, scalar,
  81                                    LLVMConstInt(LLVMInt32Type(), 0, 0), "");
  82       res = LLVMBuildShuffleVector(bld->builder, res, bld->undef,
  83                                    lp_build_const_int_vec(type, 0), "");
  84 #else
  85       /* XXX: The above path provokes a bug in LLVM 2.6 */
  86       unsigned i;
  87       res = bld->undef;
  88       for(i = 0; i < type.length; ++i) {
  89          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
  90          res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
  91       }
  92 #endif
  93       return res;
  94    }
  95 }
  96
  97
  98 LLVMValueRef
  99 lp_build_broadcast_aos(struct lp_build_context *bld,
 100                        LLVMValueRef a,
 101                        unsigned channel)
 102 {
 103    const struct lp_type type = bld->type;
 104    const unsigned n = type.length;
 105    unsigned i, j;
 106
 107    if(a == bld->undef || a == bld->zero || a == bld->one)
 108       return a;
 109
 110    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
 111     * using shuffles here actually causes worst results. More investigation is
 112     * needed. */
 113    if (type.width >= 16) {
 114       /*
 115        * Shuffle.
 116        */
 117       LLVMTypeRef elem_type = LLVMInt32Type();
 118       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 119
 120       for(j = 0; j < n; j += 4)
 121          for(i = 0; i < 4; ++i)
 122             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
 123
 124       return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
 125    }
 126    else {
 127       /*
 128        * Bit mask and recursive shifts
 129        *
 130        *   XYZW XYZW .... XYZW  <= input
 131        *   0Y00 0Y00 .... 0Y00
 132        *   YY00 YY00 .... YY00
 133        *   YYYY YYYY .... YYYY  <= output
 134        */
 135       struct lp_type type4;
 136       const char shifts[4][2] = {
 137          { 1,  2},
 138          {-1,  2},
 139          { 1, -2},
 140          {-1, -2}
 141       };
 142       unsigned i;
 143
 144       a = LLVMBuildAnd(bld->builder, a,
 145                        lp_build_const_mask_aos(type, 1 << channel), "");
 146
 147       /*
 148        * Build a type where each element is an integer that cover the four
 149        * channels.
 150        */
 151
 152       type4 = type;
 153       type4.floating = FALSE;
 154       type4.width *= 4;
 155       type4.length /= 4;
 156
 157       a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
 158
 159       for(i = 0; i < 2; ++i) {
 160          LLVMValueRef tmp = NULL;
 161          int shift = shifts[channel][i];
 162
 163 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 164          shift = -shift;
 165 #endif
 166
 167          if(shift > 0)
 168             tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), "");
 169          if(shift < 0)
 170             tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), "");
 171
 172          assert(tmp);
 173          if(tmp)
 174             a = LLVMBuildOr(bld->builder, a, tmp, "");
 175       }
 176
 177       return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), "");
 178    }
 179 }
 180
 181
 182 LLVMValueRef
 183 lp_build_swizzle_aos(struct lp_build_context *bld,
 184                      LLVMValueRef a,
 185                      const unsigned char swizzles[4])
 186 {
 187    const struct lp_type type = bld->type;
 188    const unsigned n = type.length;
 189    unsigned i, j;
 190
 191    if (swizzles[0] == PIPE_SWIZZLE_RED &&
 192        swizzles[1] == PIPE_SWIZZLE_GREEN &&
 193        swizzles[2] == PIPE_SWIZZLE_BLUE &&
 194        swizzles[3] == PIPE_SWIZZLE_ALPHA) {
 195       return a;
 196    }
 197
 198    if (swizzles[0] == swizzles[1] &&
 199        swizzles[1] == swizzles[2] &&
 200        swizzles[2] == swizzles[3]) {
 201       switch (swizzles[0]) {
 202       case PIPE_SWIZZLE_RED:
 203       case PIPE_SWIZZLE_GREEN:
 204       case PIPE_SWIZZLE_BLUE:
 205       case PIPE_SWIZZLE_ALPHA:
 206          return lp_build_broadcast_aos(bld, a, swizzles[0]);
 207       case PIPE_SWIZZLE_ZERO:
 208          return bld->zero;
 209       case PIPE_SWIZZLE_ONE:
 210          return bld->one;
 211       default:
 212          assert(0);
 213          return bld->undef;
 214       }
 215    }
 216
 217    if (type.width >= 16) {
 218       /*
 219        * Shuffle.
 220        */
 221       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
 222       LLVMTypeRef i32t = LLVMInt32Type();
 223       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 224       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
 225
 226       memset(aux, 0, sizeof aux);
 227
 228       for(j = 0; j < n; j += 4) {
 229          for(i = 0; i < 4; ++i) {
 230             unsigned shuffle;
 231             switch (swizzles[i]) {
 232             default:
 233                assert(0);
 234                /* fall through */
 235             case PIPE_SWIZZLE_RED:
 236             case PIPE_SWIZZLE_GREEN:
 237             case PIPE_SWIZZLE_BLUE:
 238             case PIPE_SWIZZLE_ALPHA:
 239                shuffle = j + swizzles[i];
 240                break;
 241             case PIPE_SWIZZLE_ZERO:
 242                shuffle = type.length + 0;
 243                if (!aux[0]) {
 244                   aux[0] = lp_build_const_elem(type, 0.0);
 245                }
 246                break;
 247             case PIPE_SWIZZLE_ONE:
 248                shuffle = type.length + 1;
 249                if (!aux[1]) {
 250                   aux[1] = lp_build_const_elem(type, 1.0);
 251                }
 252                break;
 253             }
 254             shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
 255          }
 256       }
 257
 258       for (i = 0; i < n; ++i) {
 259          if (!aux[i]) {
 260             aux[i] = undef;
 261          }
 262       }
 263
 264       return LLVMBuildShuffleVector(bld->builder, a,
 265                                     LLVMConstVector(aux, n),
 266                                     LLVMConstVector(shuffles, n), "");
 267    } else {
 268       /*
 269        * Bit mask and shifts.
 270        *
 271        * For example, this will convert BGRA to RGBA by doing
 272        *
 273        *   rgba = (bgra & 0x00ff0000) >> 16
 274        *        | (bgra & 0xff00ff00)
 275        *        | (bgra & 0x000000ff) << 16
 276        *
 277        * This is necessary not only for faster cause, but because X86 backend
 278        * will refuse shuffles of <4 x i8> vectors
 279        */
 280       LLVMValueRef res;
 281       struct lp_type type4;
 282       unsigned cond = 0;
 283       unsigned chan;
 284       int shift;
 285
 286       /*
 287        * Start with a mixture of 1 and 0.
 288        */
 289       for (chan = 0; chan < 4; ++chan) {
 290          if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
 291             cond |= 1 << chan;
 292          }
 293       }
 294       res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
 295
 296       /*
 297        * Build a type where each element is an integer that cover the four
 298        * channels.
 299        */
 300       type4 = type;
 301       type4.floating = FALSE;
 302       type4.width *= 4;
 303       type4.length /= 4;
 304
 305       a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
 306       res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
 307
 308       /*
 309        * Mask and shift the channels, trying to group as many channels in the
 310        * same shift as possible
 311        */
 312       for (shift = -3; shift <= 3; ++shift) {
 313          unsigned long long mask = 0;
 314
 315          assert(type4.width <= sizeof(mask)*8);
 316
 317          for (chan = 0; chan < 4; ++chan) {
 318             /* FIXME: big endian */
 319             if (swizzles[chan] < 4 &&
 320                 chan - swizzles[chan] == shift) {
 321                mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
 322             }
 323          }
 324
 325          if (mask) {
 326             LLVMValueRef masked;
 327             LLVMValueRef shifted;
 328
 329             if (0)
 330                debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
 331
 332             masked = LLVMBuildAnd(bld->builder, a,
 333                                   lp_build_const_int_vec(type4, mask), "");
 334             if (shift > 0) {
 335                shifted = LLVMBuildShl(bld->builder, masked,
 336                                       lp_build_const_int_vec(type4, shift*type.width), "");
 337             } else if (shift < 0) {
 338                shifted = LLVMBuildLShr(bld->builder, masked,
 339                                        lp_build_const_int_vec(type4, -shift*type.width), "");
 340             } else {
 341                shifted = masked;
 342             }
 343
 344             res = LLVMBuildOr(bld->builder, res, shifted, "");
 345          }
 346       }
 347
 348       return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
 349    }
 350 }
 351
 352
 353 /**
 354  * Extended swizzle of a single channel of a SoA vector.
 355  *
 356  * @param bld         building context
 357  * @param unswizzled  array with the 4 unswizzled values
 358  * @param swizzle     one of the PIPE_SWIZZLE_*
 359  *
 360  * @return  the swizzled value.
 361  */
 362 LLVMValueRef
 363 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
 364                              const LLVMValueRef *unswizzled,
 365                              unsigned swizzle)
 366 {
 367    switch (swizzle) {
 368    case PIPE_SWIZZLE_RED:
 369    case PIPE_SWIZZLE_GREEN:
 370    case PIPE_SWIZZLE_BLUE:
 371    case PIPE_SWIZZLE_ALPHA:
 372       return unswizzled[swizzle];
 373    case PIPE_SWIZZLE_ZERO:
 374       return bld->zero;
 375    case PIPE_SWIZZLE_ONE:
 376       return bld->one;
 377    default:
 378       assert(0);
 379       return bld->undef;
 380    }
 381 }
 382
 383
 384 /**
 385  * Extended swizzle of a SoA vector.
 386  *
 387  * @param bld         building context
 388  * @param unswizzled  array with the 4 unswizzled values
 389  * @param swizzles    array of PIPE_SWIZZLE_*
 390  * @param swizzled    output swizzled values
 391  */
 392 void
 393 lp_build_swizzle_soa(struct lp_build_context *bld,
 394                      const LLVMValueRef *unswizzled,
 395                      const unsigned char swizzles[4],
 396                      LLVMValueRef *swizzled)
 397 {
 398    unsigned chan;
 399
 400    for (chan = 0; chan < 4; ++chan) {
 401       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
 402                                                     swizzles[chan]);
 403    }
 404 }
 405
 406
 407 /**
 408  * Do an extended swizzle of a SoA vector inplace.
 409  *
 410  * @param bld         building context
 411  * @param values      intput/output array with the 4 values
 412  * @param swizzles    array of PIPE_SWIZZLE_*
 413  */
 414 void
 415 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
 416                              LLVMValueRef *values,
 417                              const unsigned char swizzles[4])
 418 {
 419    LLVMValueRef unswizzled[4];
 420    unsigned chan;
 421
 422    for (chan = 0; chan < 4; ++chan) {
 423       unswizzled[chan] = values[chan];
 424    }
 425
 426    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
 427 }