src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_bitarit.h"
  49 #include "lp_bld_logic.h"
  50 #include "lp_bld_swizzle.h"
  51 #include "lp_bld_pack.h"
  52 #include "lp_bld_flow.h"
  53 #include "lp_bld_gather.h"
  54 #include "lp_bld_format.h"
  55 #include "lp_bld_init.h"
  56 #include "lp_bld_sample.h"
  57 #include "lp_bld_sample_aos.h"
  58 #include "lp_bld_quad.h"
  59
  60
  61 /**
  62  * Build LLVM code for texture coord wrapping, for nearest filtering,
  63  * for scaled integer texcoords.
  64  * \param block_length  is the length of the pixel block along the
  65  *                      coordinate axis
  66  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
  67  * \param length  the texture size along one dimension
  68  * \param stride  pixel stride along the coordinate axis (in bytes)
  69  * \param is_pot  if TRUE, length is a power of two
  70  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  71  * \param out_offset  byte offset for the wrapped coordinate
  72  * \param out_i  resulting sub-block pixel coordinate for coord0
  73  */
  74 static void
  75 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  76                                  unsigned block_length,
  77                                  LLVMValueRef coord,
  78                                  LLVMValueRef length,
  79                                  LLVMValueRef stride,
  80                                  boolean is_pot,
  81                                  unsigned wrap_mode,
  82                                  LLVMValueRef *out_offset,
  83                                  LLVMValueRef *out_i)
  84 {
  85    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  86    LLVMBuilderRef builder = bld->gallivm->builder;
  87    LLVMValueRef length_minus_one;
  88
  89    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  90
  91    switch(wrap_mode) {
  92    case PIPE_TEX_WRAP_REPEAT:
  93       if(is_pot)
  94          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
  95       else {
  96          /* Add a bias to the texcoord to handle negative coords */
  97          LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
  98          coord = LLVMBuildAdd(builder, coord, bias, "");
  99          coord = LLVMBuildURem(builder, coord, length, "");
 100       }
 101       break;
 102
 103    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 104       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 105       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 106       break;
 107
 108    case PIPE_TEX_WRAP_CLAMP:
 109    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 110    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 111    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 112    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 113    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 114    default:
 115       assert(0);
 116    }
 117
 118    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
 119                                   out_offset, out_i);
 120 }
 121
 122
 123 /**
 124  * Build LLVM code for texture coord wrapping, for linear filtering,
 125  * for scaled integer texcoords.
 126  * \param block_length  is the length of the pixel block along the
 127  *                      coordinate axis
 128  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 129  * \param length  the texture size along one dimension
 130  * \param stride  pixel stride along the coordinate axis (in bytes)
 131  * \param is_pot  if TRUE, length is a power of two
 132  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 133  * \param offset0  resulting relative offset for coord0
 134  * \param offset1  resulting relative offset for coord0 + 1
 135  * \param i0  resulting sub-block pixel coordinate for coord0
 136  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 137  */
 138 static void
 139 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 140                                 unsigned block_length,
 141                                 LLVMValueRef coord0,
 142                                 LLVMValueRef length,
 143                                 LLVMValueRef stride,
 144                                 boolean is_pot,
 145                                 unsigned wrap_mode,
 146                                 LLVMValueRef *offset0,
 147                                 LLVMValueRef *offset1,
 148                                 LLVMValueRef *i0,
 149                                 LLVMValueRef *i1)
 150 {
 151    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 152    LLVMBuilderRef builder = bld->gallivm->builder;
 153    LLVMValueRef length_minus_one;
 154    LLVMValueRef lmask, umask, mask;
 155
 156    if (block_length != 1) {
 157       /*
 158        * If the pixel block covers more than one pixel then there is no easy
 159        * way to calculate offset1 relative to offset0. Instead, compute them
 160        * independently.
 161        */
 162
 163       LLVMValueRef coord1;
 164
 165       lp_build_sample_wrap_nearest_int(bld,
 166                                        block_length,
 167                                        coord0,
 168                                        length,
 169                                        stride,
 170                                        is_pot,
 171                                        wrap_mode,
 172                                        offset0, i0);
 173
 174       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 175
 176       lp_build_sample_wrap_nearest_int(bld,
 177                                        block_length,
 178                                        coord1,
 179                                        length,
 180                                        stride,
 181                                        is_pot,
 182                                        wrap_mode,
 183                                        offset1, i1);
 184
 185       return;
 186    }
 187
 188    /*
 189     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
 190     * multiplication.
 191     */
 192
 193    *i0 = int_coord_bld->zero;
 194    *i1 = int_coord_bld->zero;
 195
 196    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 197
 198    switch(wrap_mode) {
 199    case PIPE_TEX_WRAP_REPEAT:
 200       if (is_pot) {
 201          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 202       }
 203       else {
 204          /* Add a bias to the texcoord to handle negative coords */
 205          LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
 206          coord0 = LLVMBuildAdd(builder, coord0, bias, "");
 207          coord0 = LLVMBuildURem(builder, coord0, length, "");
 208       }
 209
 210       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 211                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 212
 213       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 214       *offset1 = LLVMBuildAnd(builder,
 215                               lp_build_add(int_coord_bld, *offset0, stride),
 216                               mask, "");
 217       break;
 218
 219    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 220       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 221                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 222       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 223                                PIPE_FUNC_LESS, coord0, length_minus_one);
 224
 225       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 226       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 227
 228       mask = LLVMBuildAnd(builder, lmask, umask, "");
 229
 230       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 231       *offset1 = lp_build_add(int_coord_bld,
 232                               *offset0,
 233                               LLVMBuildAnd(builder, stride, mask, ""));
 234       break;
 235
 236    case PIPE_TEX_WRAP_CLAMP:
 237    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 238    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 239    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 240    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 241    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 242    default:
 243       assert(0);
 244       *offset0 = int_coord_bld->zero;
 245       *offset1 = int_coord_bld->zero;
 246       break;
 247    }
 248 }
 249
 250
 251 /**
 252  * Sample a single texture image with nearest sampling.
 253  * If sampling a cube texture, r = cube face in [0,5].
 254  * Return filtered color as two vectors of 16-bit fixed point values.
 255  */
 256 static void
 257 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 258                               LLVMValueRef int_size,
 259                               LLVMValueRef row_stride_vec,
 260                               LLVMValueRef img_stride_vec,
 261                               LLVMValueRef data_ptr,
 262                               LLVMValueRef s,
 263                               LLVMValueRef t,
 264                               LLVMValueRef r,
 265                               LLVMValueRef *colors_lo,
 266                               LLVMValueRef *colors_hi)
 267 {
 268    const unsigned dims = bld->dims;
 269    LLVMBuilderRef builder = bld->gallivm->builder;
 270    struct lp_build_context i32, h16, u8n;
 271    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 272    LLVMValueRef i32_c8;
 273    LLVMValueRef width_vec, height_vec, depth_vec;
 274    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
 275    LLVMValueRef x_stride;
 276    LLVMValueRef x_offset, offset;
 277    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 278
 279    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
 280    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
 281    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
 282
 283    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
 284    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
 285    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 286
 287    lp_build_extract_image_sizes(bld,
 288                                 bld->int_size_type,
 289                                 bld->int_coord_type,
 290                                 int_size,
 291                                 &width_vec,
 292                                 &height_vec,
 293                                 &depth_vec);
 294
 295    if (bld->static_state->normalized_coords) {
 296       LLVMValueRef scaled_size;
 297       LLVMValueRef flt_size;
 298
 299       /* scale size by 256 (8 fractional bits) */
 300       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 301
 302       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 303
 304       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 305    }
 306    else {
 307       /* scale coords by 256 (8 fractional bits) */
 308       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 309       if (dims >= 2)
 310          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 311       if (dims >= 3)
 312          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 313    }
 314
 315    /* convert float to int */
 316    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 317    if (dims >= 2)
 318       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 319    if (dims >= 3)
 320       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 321
 322    /* compute floor (shift right 8) */
 323    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
 324    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 325    if (dims >= 2)
 326       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 327    if (dims >= 3)
 328       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 329
 330    /* get pixel, row, image strides */
 331    x_stride = lp_build_const_vec(bld->gallivm,
 332                                  bld->int_coord_bld.type,
 333                                  bld->format_desc->block.bits/8);
 334
 335    /* Do texcoord wrapping, compute texel offset */
 336    lp_build_sample_wrap_nearest_int(bld,
 337                                     bld->format_desc->block.width,
 338                                     s_ipart, width_vec, x_stride,
 339                                     bld->static_state->pot_width,
 340                                     bld->static_state->wrap_s,
 341                                     &x_offset, &x_subcoord);
 342    offset = x_offset;
 343    if (dims >= 2) {
 344       LLVMValueRef y_offset;
 345       lp_build_sample_wrap_nearest_int(bld,
 346                                        bld->format_desc->block.height,
 347                                        t_ipart, height_vec, row_stride_vec,
 348                                        bld->static_state->pot_height,
 349                                        bld->static_state->wrap_t,
 350                                        &y_offset, &y_subcoord);
 351       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
 352       if (dims >= 3) {
 353          LLVMValueRef z_offset;
 354          lp_build_sample_wrap_nearest_int(bld,
 355                                           1, /* block length (depth) */
 356                                           r_ipart, depth_vec, img_stride_vec,
 357                                           bld->static_state->pot_height,
 358                                           bld->static_state->wrap_r,
 359                                           &z_offset, &z_subcoord);
 360          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 361       }
 362       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 363          LLVMValueRef z_offset;
 364          /* The r coord is the cube face in [0,5] */
 365          z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 366          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 367       }
 368    }
 369
 370    /*
 371     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 372     *
 373     *   rgba0 rgba1 rgba2 rgba3
 374     *
 375     * bit cast them into 16 x u8
 376     *
 377     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 378     *
 379     * unpack them into two 8 x i16:
 380     *
 381     *   r0 g0 b0 a0 r1 g1 b1 a1
 382     *   r2 g2 b2 a2 r3 g3 b3 a3
 383     *
 384     * The higher 8 bits of the resulting elements will be zero.
 385     */
 386    {
 387       LLVMValueRef rgba8;
 388
 389       if (util_format_is_rgba8_variant(bld->format_desc)) {
 390          /*
 391           * Given the format is a rgba8, just read the pixels as is,
 392           * without any swizzling. Swizzling will be done later.
 393           */
 394          rgba8 = lp_build_gather(bld->gallivm,
 395                                  bld->texel_type.length,
 396                                  bld->format_desc->block.bits,
 397                                  bld->texel_type.width,
 398                                  data_ptr, offset);
 399
 400          rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 401       }
 402       else {
 403          rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 404                                          bld->format_desc,
 405                                          u8n.type,
 406                                          data_ptr, offset,
 407                                          x_subcoord,
 408                                          y_subcoord);
 409       }
 410
 411       /* Expand one 4*rgba8 to two 2*rgba16 */
 412       lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
 413                        rgba8,
 414                        colors_lo, colors_hi);
 415    }
 416 }
 417
 418
 419 /**
 420  * Sample a single texture image with (bi-)(tri-)linear sampling.
 421  * Return filtered color as two vectors of 16-bit fixed point values.
 422  */
 423 static void
 424 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 425                              LLVMValueRef int_size,
 426                              LLVMValueRef row_stride_vec,
 427                              LLVMValueRef img_stride_vec,
 428                              LLVMValueRef data_ptr,
 429                              LLVMValueRef s,
 430                              LLVMValueRef t,
 431                              LLVMValueRef r,
 432                              LLVMValueRef *colors_lo,
 433                              LLVMValueRef *colors_hi)
 434 {
 435    const unsigned dims = bld->dims;
 436    LLVMBuilderRef builder = bld->gallivm->builder;
 437    struct lp_build_context i32, h16, u8n;
 438    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 439    LLVMValueRef i32_c8, i32_c128, i32_c255;
 440    LLVMValueRef width_vec, height_vec, depth_vec;
 441    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
 442    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL;
 443    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL;
 444    LLVMValueRef x_stride, y_stride, z_stride;
 445    LLVMValueRef x_offset0, x_offset1;
 446    LLVMValueRef y_offset0, y_offset1;
 447    LLVMValueRef z_offset0, z_offset1;
 448    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
 449    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
 450    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
 451    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
 452    LLVMValueRef packed_lo, packed_hi;
 453    unsigned x, y, z;
 454    unsigned i, j, k;
 455    unsigned numj, numk;
 456
 457    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
 458    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
 459    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
 460
 461    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
 462    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
 463    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 464
 465    lp_build_extract_image_sizes(bld,
 466                                 bld->int_size_type,
 467                                 bld->int_coord_type,
 468                                 int_size,
 469                                 &width_vec,
 470                                 &height_vec,
 471                                 &depth_vec);
 472
 473    if (bld->static_state->normalized_coords) {
 474       LLVMValueRef scaled_size;
 475       LLVMValueRef flt_size;
 476
 477       /* scale size by 256 (8 fractional bits) */
 478       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 479
 480       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 481
 482       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 483    }
 484    else {
 485       /* scale coords by 256 (8 fractional bits) */
 486       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 487       if (dims >= 2)
 488          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 489       if (dims >= 3)
 490          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 491    }
 492
 493    /* convert float to int */
 494    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 495    if (dims >= 2)
 496       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 497    if (dims >= 3)
 498       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 499
 500    /* subtract 0.5 (add -128) */
 501    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
 502    s = LLVMBuildAdd(builder, s, i32_c128, "");
 503    if (dims >= 2) {
 504       t = LLVMBuildAdd(builder, t, i32_c128, "");
 505    }
 506    if (dims >= 3) {
 507       r = LLVMBuildAdd(builder, r, i32_c128, "");
 508    }
 509
 510    /* compute floor (shift right 8) */
 511    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
 512    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 513    if (dims >= 2)
 514       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 515    if (dims >= 3)
 516       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 517
 518    /* compute fractional part (AND with 0xff) */
 519    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
 520    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
 521    if (dims >= 2)
 522       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 523    if (dims >= 3)
 524       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 525
 526    /* get pixel, row and image strides */
 527    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
 528                                  bld->format_desc->block.bits/8);
 529    y_stride = row_stride_vec;
 530    z_stride = img_stride_vec;
 531
 532    /* do texcoord wrapping and compute texel offsets */
 533    lp_build_sample_wrap_linear_int(bld,
 534                                    bld->format_desc->block.width,
 535                                    s_ipart, width_vec, x_stride,
 536                                    bld->static_state->pot_width,
 537                                    bld->static_state->wrap_s,
 538                                    &x_offset0, &x_offset1,
 539                                    &x_subcoord[0], &x_subcoord[1]);
 540    for (z = 0; z < 2; z++) {
 541       for (y = 0; y < 2; y++) {
 542          offset[z][y][0] = x_offset0;
 543          offset[z][y][1] = x_offset1;
 544       }
 545    }
 546
 547    if (dims >= 2) {
 548       lp_build_sample_wrap_linear_int(bld,
 549                                       bld->format_desc->block.height,
 550                                       t_ipart, height_vec, y_stride,
 551                                       bld->static_state->pot_height,
 552                                       bld->static_state->wrap_t,
 553                                       &y_offset0, &y_offset1,
 554                                       &y_subcoord[0], &y_subcoord[1]);
 555
 556       for (z = 0; z < 2; z++) {
 557          for (x = 0; x < 2; x++) {
 558             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
 559                                            offset[z][0][x], y_offset0);
 560             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
 561                                            offset[z][1][x], y_offset1);
 562          }
 563       }
 564    }
 565
 566    if (dims >= 3) {
 567       lp_build_sample_wrap_linear_int(bld,
 568                                       bld->format_desc->block.height,
 569                                       r_ipart, depth_vec, z_stride,
 570                                       bld->static_state->pot_depth,
 571                                       bld->static_state->wrap_r,
 572                                       &z_offset0, &z_offset1,
 573                                       &z_subcoord[0], &z_subcoord[1]);
 574       for (y = 0; y < 2; y++) {
 575          for (x = 0; x < 2; x++) {
 576             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
 577                                            offset[0][y][x], z_offset0);
 578             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
 579                                            offset[1][y][x], z_offset1);
 580          }
 581       }
 582    }
 583    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 584       LLVMValueRef z_offset;
 585       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 586       for (y = 0; y < 2; y++) {
 587          for (x = 0; x < 2; x++) {
 588             /* The r coord is the cube face in [0,5] */
 589             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
 590                                            offset[0][y][x], z_offset);
 591          }
 592       }
 593    }
 594
 595    /*
 596     * Transform 4 x i32 in
 597     *
 598     *   s_fpart = {s0, s1, s2, s3}
 599     *
 600     * into 8 x i16
 601     *
 602     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
 603     *
 604     * into two 8 x i16
 605     *
 606     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
 607     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
 608     *
 609     * and likewise for t_fpart. There is no risk of loosing precision here
 610     * since the fractional parts only use the lower 8bits.
 611     */
 612    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
 613    if (dims >= 2)
 614       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
 615    if (dims >= 3)
 616       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
 617
 618    {
 619       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 620       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
 621       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
 622       LLVMValueRef shuffle_lo;
 623       LLVMValueRef shuffle_hi;
 624
 625       for (j = 0; j < h16.type.length; j += 4) {
 626 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 627          unsigned subindex = 0;
 628 #else
 629          unsigned subindex = 1;
 630 #endif
 631          LLVMValueRef index;
 632
 633          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
 634          for (i = 0; i < 4; ++i)
 635             shuffles_lo[j + i] = index;
 636
 637          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
 638          for (i = 0; i < 4; ++i)
 639             shuffles_hi[j + i] = index;
 640       }
 641
 642       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
 643       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
 644
 645       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 646                                           shuffle_lo, "");
 647       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 648                                           shuffle_hi, "");
 649       if (dims >= 2) {
 650          t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 651                                              shuffle_lo, "");
 652          t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 653                                              shuffle_hi, "");
 654       }
 655       if (dims >= 3) {
 656          r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 657                                              shuffle_lo, "");
 658          r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 659                                              shuffle_hi, "");
 660       }
 661    }
 662
 663    /*
 664     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 665     *
 666     *   rgba0 rgba1 rgba2 rgba3
 667     *
 668     * bit cast them into 16 x u8
 669     *
 670     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 671     *
 672     * unpack them into two 8 x i16:
 673     *
 674     *   r0 g0 b0 a0 r1 g1 b1 a1
 675     *   r2 g2 b2 a2 r3 g3 b3 a3
 676     *
 677     * The higher 8 bits of the resulting elements will be zero.
 678     */
 679    numj = 1 + (dims >= 2);
 680    numk = 1 + (dims >= 3);
 681
 682    for (k = 0; k < numk; k++) {
 683       for (j = 0; j < numj; j++) {
 684          for (i = 0; i < 2; i++) {
 685             LLVMValueRef rgba8;
 686
 687             if (util_format_is_rgba8_variant(bld->format_desc)) {
 688                /*
 689                 * Given the format is a rgba8, just read the pixels as is,
 690                 * without any swizzling. Swizzling will be done later.
 691                 */
 692                rgba8 = lp_build_gather(bld->gallivm,
 693                                        bld->texel_type.length,
 694                                        bld->format_desc->block.bits,
 695                                        bld->texel_type.width,
 696                                        data_ptr, offset[k][j][i]);
 697
 698                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 699             }
 700             else {
 701                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 702                                                bld->format_desc,
 703                                                u8n.type,
 704                                                data_ptr, offset[k][j][i],
 705                                                x_subcoord[i],
 706                                                y_subcoord[j]);
 707             }
 708
 709             /* Expand one 4*rgba8 to two 2*rgba16 */
 710             lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
 711                              rgba8,
 712                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
 713          }
 714       }
 715    }
 716
 717    /*
 718     * Linear interpolation with 8.8 fixed point.
 719     */
 720    if (dims == 1) {
 721       /* 1-D lerp */
 722       packed_lo = lp_build_lerp(&h16,
 723                                 s_fpart_lo,
 724                                 neighbors_lo[0][0][0],
 725                                 neighbors_lo[0][0][1]);
 726
 727       packed_hi = lp_build_lerp(&h16,
 728                                 s_fpart_hi,
 729                                 neighbors_hi[0][0][0],
 730                                 neighbors_hi[0][0][1]);
 731    }
 732    else {
 733       /* 2-D lerp */
 734       packed_lo = lp_build_lerp_2d(&h16,
 735                                    s_fpart_lo, t_fpart_lo,
 736                                    neighbors_lo[0][0][0],
 737                                    neighbors_lo[0][0][1],
 738                                    neighbors_lo[0][1][0],
 739                                    neighbors_lo[0][1][1]);
 740
 741       packed_hi = lp_build_lerp_2d(&h16,
 742                                    s_fpart_hi, t_fpart_hi,
 743                                    neighbors_hi[0][0][0],
 744                                    neighbors_hi[0][0][1],
 745                                    neighbors_hi[0][1][0],
 746                                    neighbors_hi[0][1][1]);
 747
 748       if (dims >= 3) {
 749          LLVMValueRef packed_lo2, packed_hi2;
 750
 751          /* lerp in the second z slice */
 752          packed_lo2 = lp_build_lerp_2d(&h16,
 753                                        s_fpart_lo, t_fpart_lo,
 754                                        neighbors_lo[1][0][0],
 755                                        neighbors_lo[1][0][1],
 756                                        neighbors_lo[1][1][0],
 757                                        neighbors_lo[1][1][1]);
 758
 759          packed_hi2 = lp_build_lerp_2d(&h16,
 760                                        s_fpart_hi, t_fpart_hi,
 761                                        neighbors_hi[1][0][0],
 762                                        neighbors_hi[1][0][1],
 763                                        neighbors_hi[1][1][0],
 764                                        neighbors_hi[1][1][1]);
 765          /* interp between two z slices */
 766          packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 767                                    packed_lo, packed_lo2);
 768          packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 769                                    packed_hi, packed_hi2);
 770       }
 771    }
 772
 773    *colors_lo = packed_lo;
 774    *colors_hi = packed_hi;
 775 }
 776
 777
 778 /**
 779  * Sample the texture/mipmap using given image filter and mip filter.
 780  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 781  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 782  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 783  */
 784 static void
 785 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 786                        unsigned img_filter,
 787                        unsigned mip_filter,
 788                        LLVMValueRef s,
 789                        LLVMValueRef t,
 790                        LLVMValueRef r,
 791                        LLVMValueRef ilevel0,
 792                        LLVMValueRef ilevel1,
 793                        LLVMValueRef lod_fpart,
 794                        LLVMValueRef colors_lo_var,
 795                        LLVMValueRef colors_hi_var)
 796 {
 797    LLVMBuilderRef builder = bld->gallivm->builder;
 798    LLVMValueRef size0;
 799    LLVMValueRef size1;
 800    LLVMValueRef row_stride0_vec;
 801    LLVMValueRef row_stride1_vec;
 802    LLVMValueRef img_stride0_vec;
 803    LLVMValueRef img_stride1_vec;
 804    LLVMValueRef data_ptr0;
 805    LLVMValueRef data_ptr1;
 806    LLVMValueRef colors0_lo, colors0_hi;
 807    LLVMValueRef colors1_lo, colors1_hi;
 808
 809    /* sample the first mipmap level */
 810    lp_build_mipmap_level_sizes(bld, ilevel0,
 811                                &size0,
 812                                &row_stride0_vec, &img_stride0_vec);
 813    data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
 814    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 815       lp_build_sample_image_nearest(bld,
 816                                     size0,
 817                                     row_stride0_vec, img_stride0_vec,
 818                                     data_ptr0, s, t, r,
 819                                     &colors0_lo, &colors0_hi);
 820    }
 821    else {
 822       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 823       lp_build_sample_image_linear(bld,
 824                                    size0,
 825                                    row_stride0_vec, img_stride0_vec,
 826                                    data_ptr0, s, t, r,
 827                                    &colors0_lo, &colors0_hi);
 828    }
 829
 830    /* Store the first level's colors in the output variables */
 831    LLVMBuildStore(builder, colors0_lo, colors_lo_var);
 832    LLVMBuildStore(builder, colors0_hi, colors_hi_var);
 833
 834    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 835       LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0);
 836       LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32);
 837       struct lp_build_if_state if_ctx;
 838       LLVMValueRef need_lerp;
 839
 840       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
 841       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
 842
 843       /* need_lerp = lod_fpart > 0 */
 844       need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
 845                                 lod_fpart, LLVMConstNull(i32_type),
 846                                 "need_lerp");
 847
 848       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
 849       {
 850          struct lp_build_context h16_bld;
 851
 852          lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
 853
 854          /* sample the second mipmap level */
 855          lp_build_mipmap_level_sizes(bld, ilevel1,
 856                                      &size1,
 857                                      &row_stride1_vec, &img_stride1_vec);
 858          data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
 859          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 860             lp_build_sample_image_nearest(bld,
 861                                           size1,
 862                                           row_stride1_vec, img_stride1_vec,
 863                                           data_ptr1, s, t, r,
 864                                           &colors1_lo, &colors1_hi);
 865          }
 866          else {
 867             lp_build_sample_image_linear(bld,
 868                                          size1,
 869                                          row_stride1_vec, img_stride1_vec,
 870                                          data_ptr1, s, t, r,
 871                                          &colors1_lo, &colors1_hi);
 872          }
 873
 874          /* interpolate samples from the two mipmap levels */
 875
 876          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
 877          lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
 878
 879 #if HAVE_LLVM == 0x208
 880          /* This is a work-around for a bug in LLVM 2.8.
 881           * Evidently, something goes wrong in the construction of the
 882           * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
 883           * to force the vector to be properly constructed.
 884           * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
 885           */
 886          {
 887             LLVMValueRef shuffles[8], shuffle;
 888             int i;
 889             assert(h16_bld.type.length <= Elements(shuffles));
 890             for (i = 0; i < h16_bld.type.length; i++)
 891                shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
 892             shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
 893             lod_fpart = LLVMBuildShuffleVector(builder,
 894                                                lod_fpart, lod_fpart,
 895                                                shuffle, "");
 896          }
 897 #endif
 898
 899          colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
 900                                     colors0_lo, colors1_lo);
 901          colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
 902                                     colors0_hi, colors1_hi);
 903
 904          LLVMBuildStore(builder, colors0_lo, colors_lo_var);
 905          LLVMBuildStore(builder, colors0_hi, colors_hi_var);
 906       }
 907       lp_build_endif(&if_ctx);
 908    }
 909 }
 910
 911
 912
 913 /**
 914  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 915  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 916  * but only limited texture coord wrap modes.
 917  */
 918 void
 919 lp_build_sample_aos(struct lp_build_sample_context *bld,
 920                     unsigned unit,
 921                     LLVMValueRef s,
 922                     LLVMValueRef t,
 923                     LLVMValueRef r,
 924                     const LLVMValueRef *ddx,
 925                     const LLVMValueRef *ddy,
 926                     LLVMValueRef lod_bias, /* optional */
 927                     LLVMValueRef explicit_lod, /* optional */
 928                     LLVMValueRef texel_out[4])
 929 {
 930    struct lp_build_context *int_bld = &bld->int_bld;
 931    LLVMBuilderRef builder = bld->gallivm->builder;
 932    const unsigned mip_filter = bld->static_state->min_mip_filter;
 933    const unsigned min_filter = bld->static_state->min_img_filter;
 934    const unsigned mag_filter = bld->static_state->mag_img_filter;
 935    const unsigned dims = bld->dims;
 936    LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
 937    LLVMValueRef ilevel0, ilevel1 = NULL;
 938    LLVMValueRef packed, packed_lo, packed_hi;
 939    LLVMValueRef unswizzled[4];
 940    LLVMValueRef face_ddx[4], face_ddy[4];
 941    struct lp_build_context h16_bld;
 942    LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0);
 943
 944    /* we only support the common/simple wrap modes at this time */
 945    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
 946    if (dims >= 2)
 947       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
 948    if (dims >= 3)
 949       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
 950
 951
 952    /* make 16-bit fixed-pt builder context */
 953    lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
 954
 955    /* cube face selection, compute pre-face coords, etc. */
 956    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 957       LLVMValueRef face, face_s, face_t;
 958       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
 959       s = face_s; /* vec */
 960       t = face_t; /* vec */
 961       /* use 'r' to indicate cube face */
 962       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
 963
 964       /* recompute ddx, ddy using the new (s,t) face texcoords */
 965       face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
 966       face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
 967       face_ddx[2] = NULL;
 968       face_ddx[3] = NULL;
 969       face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
 970       face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
 971       face_ddy[2] = NULL;
 972       face_ddy[3] = NULL;
 973       ddx = face_ddx;
 974       ddy = face_ddy;
 975    }
 976
 977    /*
 978     * Compute the level of detail (float).
 979     */
 980    if (min_filter != mag_filter ||
 981        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 982       /* Need to compute lod either to choose mipmap levels or to
 983        * distinguish between minification/magnification with one mipmap level.
 984        */
 985       lp_build_lod_selector(bld, unit, ddx, ddy,
 986                             lod_bias, explicit_lod,
 987                             mip_filter,
 988                             &lod_ipart, &lod_fpart);
 989    } else {
 990       lod_ipart = i32t_zero;
 991    }
 992
 993    /*
 994     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
 995     */
 996    switch (mip_filter) {
 997    default:
 998       assert(0 && "bad mip_filter value in lp_build_sample_aos()");
 999       /* fall-through */
1000    case PIPE_TEX_MIPFILTER_NONE:
1001       /* always use mip level 0 */
1002       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1003          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1004           * We should be able to set ilevel0 = const(0) but that causes
1005           * bad x86 code to be emitted.
1006           */
1007          assert(lod_ipart);
1008          lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1009       }
1010       else {
1011          ilevel0 = i32t_zero;
1012       }
1013       break;
1014    case PIPE_TEX_MIPFILTER_NEAREST:
1015       assert(lod_ipart);
1016       lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1017       break;
1018    case PIPE_TEX_MIPFILTER_LINEAR:
1019       assert(lod_ipart);
1020       assert(lod_fpart);
1021       lp_build_linear_mip_levels(bld, unit,
1022                                  lod_ipart, &lod_fpart,
1023                                  &ilevel0, &ilevel1);
1024       break;
1025    }
1026
1027    /*
1028     * Get/interpolate texture colors.
1029     */
1030
1031    packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1032    packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1033
1034    if (min_filter == mag_filter) {
1035       /* no need to distinquish between minification and magnification */
1036       lp_build_sample_mipmap(bld,
1037                              min_filter, mip_filter,
1038                              s, t, r,
1039                              ilevel0, ilevel1, lod_fpart,
1040                              packed_lo, packed_hi);
1041    }
1042    else {
1043       /* Emit conditional to choose min image filter or mag image filter
1044        * depending on the lod being > 0 or <= 0, respectively.
1045        */
1046       struct lp_build_if_state if_ctx;
1047       LLVMValueRef minify;
1048
1049       /* minify = lod >= 0.0 */
1050       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1051                              lod_ipart, int_bld->zero, "");
1052
1053       lp_build_if(&if_ctx, bld->gallivm, minify);
1054       {
1055          /* Use the minification filter */
1056          lp_build_sample_mipmap(bld,
1057                                 min_filter, mip_filter,
1058                                 s, t, r,
1059                                 ilevel0, ilevel1, lod_fpart,
1060                                 packed_lo, packed_hi);
1061       }
1062       lp_build_else(&if_ctx);
1063       {
1064          /* Use the magnification filter */
1065          lp_build_sample_mipmap(bld,
1066                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1067                                 s, t, r,
1068                                 i32t_zero, NULL, NULL,
1069                                 packed_lo, packed_hi);
1070       }
1071       lp_build_endif(&if_ctx);
1072    }
1073
1074    /*
1075     * combine the values stored in 'packed_lo' and 'packed_hi' variables
1076     * into 'packed'
1077     */
1078    packed = lp_build_pack2(bld->gallivm,
1079                            h16_bld.type, lp_type_unorm(8),
1080                            LLVMBuildLoad(builder, packed_lo, ""),
1081                            LLVMBuildLoad(builder, packed_hi, ""));
1082
1083    /*
1084     * Convert to SoA and swizzle.
1085     */
1086    lp_build_rgba8_to_f32_soa(bld->gallivm,
1087                              bld->texel_type,
1088                              packed, unswizzled);
1089
1090    if (util_format_is_rgba8_variant(bld->format_desc)) {
1091       lp_build_format_swizzle_soa(bld->format_desc,
1092                                   &bld->texel_bld,
1093                                   unswizzled, texel_out);
1094    }
1095    else {
1096       texel_out[0] = unswizzled[0];
1097       texel_out[1] = unswizzled[1];
1098       texel_out[2] = unswizzled[2];
1099       texel_out[3] = unswizzled[3];
1100    }
1101 }