src/gallium/auxiliary/gallivm/lp_bld_swizzle.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Helper functions for swizzling/shuffling.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include <inttypes.h>  /* for PRIx64 macro */
  36 #include "util/u_debug.h"
  37
  38 #include "lp_bld_type.h"
  39 #include "lp_bld_const.h"
  40 #include "lp_bld_init.h"
  41 #include "lp_bld_logic.h"
  42 #include "lp_bld_swizzle.h"
  43 #include "lp_bld_pack.h"
  44
  45
  46 LLVMValueRef
  47 lp_build_broadcast(struct gallivm_state *gallivm,
  48                    LLVMTypeRef vec_type,
  49                    LLVMValueRef scalar)
  50 {
  51    LLVMValueRef res;
  52
  53    if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
  54       /* scalar */
  55       assert(vec_type == LLVMTypeOf(scalar));
  56       res = scalar;
  57    } else {
  58       LLVMBuilderRef builder = gallivm->builder;
  59       const unsigned length = LLVMGetVectorSize(vec_type);
  60       LLVMValueRef undef = LLVMGetUndef(vec_type);
  61       LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
  62
  63       assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
  64
  65       if (HAVE_LLVM >= 0x207) {
  66          /* The shuffle vector is always made of int32 elements */
  67          LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
  68          res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
  69          res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
  70       } else {
  71          /* XXX: The above path provokes a bug in LLVM 2.6 */
  72          unsigned i;
  73          res = undef;
  74          for(i = 0; i < length; ++i) {
  75             LLVMValueRef index = lp_build_const_int32(gallivm, i);
  76             res = LLVMBuildInsertElement(builder, res, scalar, index, "");
  77          }
  78       }
  79    }
  80
  81    return res;
  82 }
  83
  84
  85 /**
  86  * Broadcast
  87  */
  88 LLVMValueRef
  89 lp_build_broadcast_scalar(struct lp_build_context *bld,
  90                           LLVMValueRef scalar)
  91 {
  92    assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
  93
  94    return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
  95 }
  96
  97
  98 /**
  99  * Combined extract and broadcast (mere shuffle in most cases)
 100  */
 101 LLVMValueRef
 102 lp_build_extract_broadcast(struct gallivm_state *gallivm,
 103                            struct lp_type src_type,
 104                            struct lp_type dst_type,
 105                            LLVMValueRef vector,
 106                            LLVMValueRef index)
 107 {
 108    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
 109    LLVMValueRef res;
 110
 111    assert(src_type.floating == dst_type.floating);
 112    assert(src_type.width    == dst_type.width);
 113
 114    assert(lp_check_value(src_type, vector));
 115    assert(LLVMTypeOf(index) == i32t);
 116
 117    if (src_type.length == 1) {
 118       if (dst_type.length == 1) {
 119          /*
 120           * Trivial scalar -> scalar.
 121           */
 122
 123          res = vector;
 124       }
 125       else {
 126          /*
 127           * Broadcast scalar -> vector.
 128           */
 129
 130          res = lp_build_broadcast(gallivm,
 131                                   lp_build_vec_type(gallivm, dst_type),
 132                                   vector);
 133       }
 134    }
 135    else {
 136       if (dst_type.length > 1) {
 137          /*
 138           * shuffle - result can be of different length.
 139           */
 140
 141          LLVMValueRef shuffle;
 142          shuffle = lp_build_broadcast(gallivm,
 143                                       LLVMVectorType(i32t, dst_type.length),
 144                                       index);
 145          res = LLVMBuildShuffleVector(gallivm->builder, vector,
 146                                       LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
 147                                       shuffle, "");
 148       }
 149       else {
 150          /*
 151           * Trivial extract scalar from vector.
 152           */
 153           res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
 154       }
 155    }
 156
 157    return res;
 158 }
 159
 160
 161 /**
 162  * Swizzle one channel into other channels.
 163  */
 164 LLVMValueRef
 165 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
 166                             LLVMValueRef a,
 167                             unsigned channel,
 168                             unsigned num_channels)
 169 {
 170    LLVMBuilderRef builder = bld->gallivm->builder;
 171    const struct lp_type type = bld->type;
 172    const unsigned n = type.length;
 173    unsigned i, j;
 174
 175    if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
 176       return a;
 177
 178    assert(num_channels == 2 || num_channels == 4);
 179
 180    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
 181     * using shuffles here actually causes worst results. More investigation is
 182     * needed. */
 183    if (type.width >= 16) {
 184       /*
 185        * Shuffle.
 186        */
 187       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 188       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 189
 190       for(j = 0; j < n; j += num_channels)
 191          for(i = 0; i < num_channels; ++i)
 192             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
 193
 194       return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
 195    }
 196    else if (num_channels == 2) {
 197       /*
 198        * Bit mask and shifts
 199        *
 200        *   XY XY .... XY  <= input
 201        *   0Y 0Y .... 0Y
 202        *   YY YY .... YY
 203        *   YY YY .... YY  <= output
 204        */
 205       struct lp_type type2;
 206       LLVMValueRef tmp = NULL;
 207       int shift;
 208
 209       a = LLVMBuildAnd(builder, a,
 210                        lp_build_const_mask_aos(bld->gallivm,
 211                                                type, 1 << channel, num_channels), "");
 212
 213       type2 = type;
 214       type2.floating = FALSE;
 215       type2.width *= 2;
 216       type2.length /= 2;
 217
 218       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
 219
 220 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 221       shift = channel == 0 ? 1 : -1;
 222 #else
 223       shift = channel == 0 ? -1 : 1;
 224 #endif
 225
 226       if (shift > 0) {
 227          tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
 228       } else if (shift < 0) {
 229          tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
 230       }
 231
 232       assert(tmp);
 233       if (tmp) {
 234          a = LLVMBuildOr(builder, a, tmp, "");
 235       }
 236
 237       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
 238    }
 239    else {
 240       /*
 241        * Bit mask and recursive shifts
 242        *
 243        *   XYZW XYZW .... XYZW  <= input
 244        *   0Y00 0Y00 .... 0Y00
 245        *   YY00 YY00 .... YY00
 246        *   YYYY YYYY .... YYYY  <= output
 247        */
 248       struct lp_type type4;
 249       const int shifts[4][2] = {
 250          { 1,  2},
 251          {-1,  2},
 252          { 1, -2},
 253          {-1, -2}
 254       };
 255       unsigned i;
 256
 257       a = LLVMBuildAnd(builder, a,
 258                        lp_build_const_mask_aos(bld->gallivm,
 259                                                type, 1 << channel, 4), "");
 260
 261       /*
 262        * Build a type where each element is an integer that cover the four
 263        * channels.
 264        */
 265
 266       type4 = type;
 267       type4.floating = FALSE;
 268       type4.width *= 4;
 269       type4.length /= 4;
 270
 271       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
 272
 273       for(i = 0; i < 2; ++i) {
 274          LLVMValueRef tmp = NULL;
 275          int shift = shifts[channel][i];
 276
 277 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 278          shift = -shift;
 279 #endif
 280
 281          if(shift > 0)
 282             tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
 283          if(shift < 0)
 284             tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
 285
 286          assert(tmp);
 287          if(tmp)
 288             a = LLVMBuildOr(builder, a, tmp, "");
 289       }
 290
 291       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
 292    }
 293 }
 294
 295
 296 /**
 297  * Swizzle a vector consisting of an array of XYZW structs.
 298  *
 299  * This fills a vector of dst_len length with the swizzled channels from src.
 300  *
 301  * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
 302  *      RGBA RGBA = BGR BGR BG
 303  *
 304  * @param swizzles        the swizzle array
 305  * @param num_swizzles    the number of elements in swizzles
 306  * @param dst_len         the length of the result
 307  */
 308 LLVMValueRef
 309 lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
 310                        LLVMValueRef src,
 311                        const unsigned char* swizzles,
 312                        unsigned num_swizzles,
 313                        unsigned dst_len)
 314 {
 315    LLVMBuilderRef builder = gallivm->builder;
 316    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
 317    unsigned i;
 318
 319    assert(dst_len < LP_MAX_VECTOR_WIDTH);
 320
 321    for (i = 0; i < dst_len; ++i) {
 322       int swizzle = swizzles[i % num_swizzles];
 323
 324       if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
 325          shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 326       } else {
 327          shuffles[i] = lp_build_const_int32(gallivm, swizzle);
 328       }
 329    }
 330
 331    return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
 332 }
 333
 334
 335 LLVMValueRef
 336 lp_build_swizzle_aos(struct lp_build_context *bld,
 337                      LLVMValueRef a,
 338                      const unsigned char swizzles[4])
 339 {
 340    LLVMBuilderRef builder = bld->gallivm->builder;
 341    const struct lp_type type = bld->type;
 342    const unsigned n = type.length;
 343    unsigned i, j;
 344
 345    if (swizzles[0] == PIPE_SWIZZLE_RED &&
 346        swizzles[1] == PIPE_SWIZZLE_GREEN &&
 347        swizzles[2] == PIPE_SWIZZLE_BLUE &&
 348        swizzles[3] == PIPE_SWIZZLE_ALPHA) {
 349       return a;
 350    }
 351
 352    if (swizzles[0] == swizzles[1] &&
 353        swizzles[1] == swizzles[2] &&
 354        swizzles[2] == swizzles[3]) {
 355       switch (swizzles[0]) {
 356       case PIPE_SWIZZLE_RED:
 357       case PIPE_SWIZZLE_GREEN:
 358       case PIPE_SWIZZLE_BLUE:
 359       case PIPE_SWIZZLE_ALPHA:
 360          return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
 361       case PIPE_SWIZZLE_ZERO:
 362          return bld->zero;
 363       case PIPE_SWIZZLE_ONE:
 364          return bld->one;
 365       case LP_BLD_SWIZZLE_DONTCARE:
 366          return bld->undef;
 367       default:
 368          assert(0);
 369          return bld->undef;
 370       }
 371    }
 372
 373    if (type.width >= 16) {
 374       /*
 375        * Shuffle.
 376        */
 377       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
 378       LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
 379       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 380       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
 381
 382       memset(aux, 0, sizeof aux);
 383
 384       for(j = 0; j < n; j += 4) {
 385          for(i = 0; i < 4; ++i) {
 386             unsigned shuffle;
 387             switch (swizzles[i]) {
 388             default:
 389                assert(0);
 390                /* fall through */
 391             case PIPE_SWIZZLE_RED:
 392             case PIPE_SWIZZLE_GREEN:
 393             case PIPE_SWIZZLE_BLUE:
 394             case PIPE_SWIZZLE_ALPHA:
 395                shuffle = j + swizzles[i];
 396                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
 397                break;
 398             case PIPE_SWIZZLE_ZERO:
 399                shuffle = type.length + 0;
 400                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
 401                if (!aux[0]) {
 402                   aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
 403                }
 404                break;
 405             case PIPE_SWIZZLE_ONE:
 406                shuffle = type.length + 1;
 407                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
 408                if (!aux[1]) {
 409                   aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
 410                }
 411                break;
 412             case LP_BLD_SWIZZLE_DONTCARE:
 413                shuffles[j + i] = LLVMGetUndef(i32t);
 414                break;
 415             }
 416          }
 417       }
 418
 419       for (i = 0; i < n; ++i) {
 420          if (!aux[i]) {
 421             aux[i] = undef;
 422          }
 423       }
 424
 425       return LLVMBuildShuffleVector(builder, a,
 426                                     LLVMConstVector(aux, n),
 427                                     LLVMConstVector(shuffles, n), "");
 428    } else {
 429       /*
 430        * Bit mask and shifts.
 431        *
 432        * For example, this will convert BGRA to RGBA by doing
 433        *
 434        *   rgba = (bgra & 0x00ff0000) >> 16
 435        *        | (bgra & 0xff00ff00)
 436        *        | (bgra & 0x000000ff) << 16
 437        *
 438        * This is necessary not only for faster cause, but because X86 backend
 439        * will refuse shuffles of <4 x i8> vectors
 440        */
 441       LLVMValueRef res;
 442       struct lp_type type4;
 443       unsigned cond = 0;
 444       unsigned chan;
 445       int shift;
 446
 447       /*
 448        * Start with a mixture of 1 and 0.
 449        */
 450       for (chan = 0; chan < 4; ++chan) {
 451          if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
 452             cond |= 1 << chan;
 453          }
 454       }
 455       res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
 456
 457       /*
 458        * Build a type where each element is an integer that cover the four
 459        * channels.
 460        */
 461       type4 = type;
 462       type4.floating = FALSE;
 463       type4.width *= 4;
 464       type4.length /= 4;
 465
 466       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
 467       res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
 468
 469       /*
 470        * Mask and shift the channels, trying to group as many channels in the
 471        * same shift as possible
 472        */
 473       for (shift = -3; shift <= 3; ++shift) {
 474          uint64_t mask = 0;
 475
 476          assert(type4.width <= sizeof(mask)*8);
 477
 478          for (chan = 0; chan < 4; ++chan) {
 479             /* FIXME: big endian */
 480             if (swizzles[chan] < 4 &&
 481                 chan - swizzles[chan] == shift) {
 482                mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
 483             }
 484          }
 485
 486          if (mask) {
 487             LLVMValueRef masked;
 488             LLVMValueRef shifted;
 489             if (0)
 490                debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
 491
 492             masked = LLVMBuildAnd(builder, a,
 493                                   lp_build_const_int_vec(bld->gallivm, type4, mask), "");
 494             if (shift > 0) {
 495                shifted = LLVMBuildShl(builder, masked,
 496                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
 497             } else if (shift < 0) {
 498                shifted = LLVMBuildLShr(builder, masked,
 499                                        lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
 500             } else {
 501                shifted = masked;
 502             }
 503
 504             res = LLVMBuildOr(builder, res, shifted, "");
 505          }
 506       }
 507
 508       return LLVMBuildBitCast(builder, res,
 509                               lp_build_vec_type(bld->gallivm, type), "");
 510    }
 511 }
 512
 513
 514 /**
 515  * Extended swizzle of a single channel of a SoA vector.
 516  *
 517  * @param bld         building context
 518  * @param unswizzled  array with the 4 unswizzled values
 519  * @param swizzle     one of the PIPE_SWIZZLE_*
 520  *
 521  * @return  the swizzled value.
 522  */
 523 LLVMValueRef
 524 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
 525                              const LLVMValueRef *unswizzled,
 526                              unsigned swizzle)
 527 {
 528    switch (swizzle) {
 529    case PIPE_SWIZZLE_RED:
 530    case PIPE_SWIZZLE_GREEN:
 531    case PIPE_SWIZZLE_BLUE:
 532    case PIPE_SWIZZLE_ALPHA:
 533       return unswizzled[swizzle];
 534    case PIPE_SWIZZLE_ZERO:
 535       return bld->zero;
 536    case PIPE_SWIZZLE_ONE:
 537       return bld->one;
 538    default:
 539       assert(0);
 540       return bld->undef;
 541    }
 542 }
 543
 544
 545 /**
 546  * Extended swizzle of a SoA vector.
 547  *
 548  * @param bld         building context
 549  * @param unswizzled  array with the 4 unswizzled values
 550  * @param swizzles    array of PIPE_SWIZZLE_*
 551  * @param swizzled    output swizzled values
 552  */
 553 void
 554 lp_build_swizzle_soa(struct lp_build_context *bld,
 555                      const LLVMValueRef *unswizzled,
 556                      const unsigned char swizzles[4],
 557                      LLVMValueRef *swizzled)
 558 {
 559    unsigned chan;
 560
 561    for (chan = 0; chan < 4; ++chan) {
 562       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
 563                                                     swizzles[chan]);
 564    }
 565 }
 566
 567
 568 /**
 569  * Do an extended swizzle of a SoA vector inplace.
 570  *
 571  * @param bld         building context
 572  * @param values      intput/output array with the 4 values
 573  * @param swizzles    array of PIPE_SWIZZLE_*
 574  */
 575 void
 576 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
 577                              LLVMValueRef *values,
 578                              const unsigned char swizzles[4])
 579 {
 580    LLVMValueRef unswizzled[4];
 581    unsigned chan;
 582
 583    for (chan = 0; chan < 4; ++chan) {
 584       unswizzled[chan] = values[chan];
 585    }
 586
 587    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
 588 }
 589
 590
 591 /**
 592  * Transpose from AOS <-> SOA
 593  *
 594  * @param single_type_lp   type of pixels
 595  * @param src              the 4 * n pixel input
 596  * @param dst              the 4 * n pixel output
 597  */
 598 void
 599 lp_build_transpose_aos(struct gallivm_state *gallivm,
 600                        struct lp_type single_type_lp,
 601                        const LLVMValueRef src[4],
 602                        LLVMValueRef dst[4])
 603 {
 604    struct lp_type double_type_lp = single_type_lp;
 605    LLVMTypeRef single_type;
 606    LLVMTypeRef double_type;
 607    LLVMValueRef t0, t1, t2, t3;
 608
 609    double_type_lp.length >>= 1;
 610    double_type_lp.width  <<= 1;
 611
 612    double_type = lp_build_vec_type(gallivm, double_type_lp);
 613    single_type = lp_build_vec_type(gallivm, single_type_lp);
 614
 615    /* Interleave x, y, z, w -> xy and zw */
 616    t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
 617    t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
 618    t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
 619    t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
 620
 621    /* Cast to double width type for second interleave */
 622    t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
 623    t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
 624    t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
 625    t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
 626
 627    /* Interleave xy, zw -> xyzw */
 628    dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
 629    dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
 630    dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
 631    dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
 632
 633    /* Cast back to original single width type */
 634    dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
 635    dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
 636    dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
 637    dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
 638 }
 639
 640
 641 /**
 642  * Transpose from AOS <-> SOA for num_srcs
 643  */
 644 void
 645 lp_build_transpose_aos_n(struct gallivm_state *gallivm,
 646                          struct lp_type type,
 647                          const LLVMValueRef* src,
 648                          unsigned num_srcs,
 649                          LLVMValueRef* dst)
 650 {
 651    switch (num_srcs) {
 652       case 1:
 653          dst[0] = src[0];
 654          break;
 655
 656       case 2:
 657       {
 658          /* Note: we must use a temporary incase src == dst */
 659          LLVMValueRef lo, hi;
 660
 661          lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
 662          hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
 663
 664          dst[0] = lo;
 665          dst[1] = hi;
 666          break;
 667       }
 668
 669       case 4:
 670          lp_build_transpose_aos(gallivm, type, src, dst);
 671          break;
 672
 673       default:
 674          assert(0);
 675    };
 676 }
 677
 678
 679 /**
 680  * Pack n-th element of aos values,
 681  * pad out to destination size.
 682  * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
 683  */
 684 LLVMValueRef
 685 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
 686                           struct lp_type src_type,
 687                           struct lp_type dst_type,
 688                           const LLVMValueRef src,
 689                           unsigned channel)
 690 {
 691    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
 692    LLVMValueRef undef = LLVMGetUndef(i32t);
 693    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 694    unsigned num_src = src_type.length / 4;
 695    unsigned num_dst = dst_type.length;
 696    unsigned i;
 697
 698    assert(num_src <= num_dst);
 699
 700    for (i = 0; i < num_src; i++) {
 701       shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
 702    }
 703    for (i = num_src; i < num_dst; i++) {
 704       shuffles[i] = undef;
 705    }
 706
 707    if (num_dst == 1) {
 708       return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
 709    }
 710    else {
 711       return LLVMBuildShuffleVector(gallivm->builder, src, src,
 712                                     LLVMConstVector(shuffles, num_dst), "");
 713    }
 714 }
 715
 716
 717 /**
 718  * Unpack and broadcast packed aos values consisting of only the
 719  * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
 720  */
 721 LLVMValueRef
 722 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
 723                                       struct lp_type src_type,
 724                                       struct lp_type dst_type,
 725                                       const LLVMValueRef src)
 726 {
 727    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
 728    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 729    unsigned num_dst = dst_type.length;
 730    unsigned num_src = dst_type.length / 4;
 731    unsigned i;
 732
 733    assert(num_dst / 4 <= src_type.length);
 734
 735    for (i = 0; i < num_src; i++) {
 736       shuffles[i*4] = LLVMConstInt(i32t, i, 0);
 737       shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
 738       shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
 739       shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
 740    }
 741
 742    if (num_src == 1) {
 743       return lp_build_extract_broadcast(gallivm, src_type, dst_type,
 744                                         src, shuffles[0]);
 745    }
 746    else {
 747       return LLVMBuildShuffleVector(gallivm->builder, src, src,
 748                                     LLVMConstVector(shuffles, num_dst), "");
 749    }
 750 }
 751