src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_flow.h"
  52 #include "lp_bld_gather.h"
  53 #include "lp_bld_format.h"
  54 #include "lp_bld_sample.h"
  55 #include "lp_bld_sample_aos.h"
  56 #include "lp_bld_quad.h"
  57
  58
  59 /**
  60  * Build LLVM code for texture coord wrapping, for nearest filtering,
  61  * for scaled integer texcoords.
  62  * \param block_length  is the length of the pixel block along the
  63  *                      coordinate axis
  64  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
  65  * \param length  the texture size along one dimension
  66  * \param stride  pixel stride along the coordinate axis (in bytes)
  67  * \param is_pot  if TRUE, length is a power of two
  68  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  69  * \param out_offset  byte offset for the wrapped coordinate
  70  * \param out_i  resulting sub-block pixel coordinate for coord0
  71  */
  72 static void
  73 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  74                                  unsigned block_length,
  75                                  LLVMValueRef coord,
  76                                  LLVMValueRef length,
  77                                  LLVMValueRef stride,
  78                                  boolean is_pot,
  79                                  unsigned wrap_mode,
  80                                  LLVMValueRef *out_offset,
  81                                  LLVMValueRef *out_i)
  82 {
  83    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
  84    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  85    LLVMValueRef length_minus_one;
  86
  87    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
  88
  89    switch(wrap_mode) {
  90    case PIPE_TEX_WRAP_REPEAT:
  91       if(is_pot)
  92          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
  93       else {
  94          /* Add a bias to the texcoord to handle negative coords */
  95          LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
  96          coord = LLVMBuildAdd(bld->builder, coord, bias, "");
  97          coord = LLVMBuildURem(bld->builder, coord, length, "");
  98       }
  99       break;
 100
 101    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 102       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 103       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 104       break;
 105
 106    case PIPE_TEX_WRAP_CLAMP:
 107    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 108    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 109    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 110    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 111    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 112    default:
 113       assert(0);
 114    }
 115
 116    lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
 117                                   out_offset, out_i);
 118 }
 119
 120
 121 /**
 122  * Build LLVM code for texture coord wrapping, for linear filtering,
 123  * for scaled integer texcoords.
 124  * \param block_length  is the length of the pixel block along the
 125  *                      coordinate axis
 126  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 127  * \param length  the texture size along one dimension
 128  * \param stride  pixel stride along the coordinate axis (in bytes)
 129  * \param is_pot  if TRUE, length is a power of two
 130  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 131  * \param offset0  resulting relative offset for coord0
 132  * \param offset1  resulting relative offset for coord0 + 1
 133  * \param i0  resulting sub-block pixel coordinate for coord0
 134  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 135  */
 136 static void
 137 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 138                                 unsigned block_length,
 139                                 LLVMValueRef coord0,
 140                                 LLVMValueRef length,
 141                                 LLVMValueRef stride,
 142                                 boolean is_pot,
 143                                 unsigned wrap_mode,
 144                                 LLVMValueRef *offset0,
 145                                 LLVMValueRef *offset1,
 146                                 LLVMValueRef *i0,
 147                                 LLVMValueRef *i1)
 148 {
 149    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 150    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 151    LLVMValueRef length_minus_one;
 152    LLVMValueRef lmask, umask, mask;
 153
 154    if (block_length != 1) {
 155       /*
 156        * If the pixel block covers more than one pixel then there is no easy
 157        * way to calculate offset1 relative to offset0. Instead, compute them
 158        * independently.
 159        */
 160
 161       LLVMValueRef coord1;
 162
 163       lp_build_sample_wrap_nearest_int(bld,
 164                                        block_length,
 165                                        coord0,
 166                                        length,
 167                                        stride,
 168                                        is_pot,
 169                                        wrap_mode,
 170                                        offset0, i0);
 171
 172       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 173
 174       lp_build_sample_wrap_nearest_int(bld,
 175                                        block_length,
 176                                        coord1,
 177                                        length,
 178                                        stride,
 179                                        is_pot,
 180                                        wrap_mode,
 181                                        offset1, i1);
 182
 183       return;
 184    }
 185
 186    /*
 187     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
 188     * multiplication.
 189     */
 190
 191    *i0 = uint_coord_bld->zero;
 192    *i1 = uint_coord_bld->zero;
 193
 194    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 195
 196    switch(wrap_mode) {
 197    case PIPE_TEX_WRAP_REPEAT:
 198       if (is_pot) {
 199          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 200       }
 201       else {
 202          /* Add a bias to the texcoord to handle negative coords */
 203          LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
 204          coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
 205          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 206       }
 207
 208       mask = lp_build_compare(bld->builder, int_coord_bld->type,
 209                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 210
 211       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 212       *offset1 = LLVMBuildAnd(bld->builder,
 213                               lp_build_add(uint_coord_bld, *offset0, stride),
 214                               mask, "");
 215       break;
 216
 217    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 218       lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 219                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 220       umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 221                                PIPE_FUNC_LESS, coord0, length_minus_one);
 222
 223       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 224       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 225
 226       mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
 227
 228       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 229       *offset1 = lp_build_add(uint_coord_bld,
 230                               *offset0,
 231                               LLVMBuildAnd(bld->builder, stride, mask, ""));
 232       break;
 233
 234    case PIPE_TEX_WRAP_CLAMP:
 235    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 236    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 237    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 238    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 239    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 240    default:
 241       assert(0);
 242       *offset0 = uint_coord_bld->zero;
 243       *offset1 = uint_coord_bld->zero;
 244       break;
 245    }
 246 }
 247
 248
 249 /**
 250  * Sample a single texture image with nearest sampling.
 251  * If sampling a cube texture, r = cube face in [0,5].
 252  * Return filtered color as two vectors of 16-bit fixed point values.
 253  */
 254 static void
 255 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 256                               LLVMValueRef width_vec,
 257                               LLVMValueRef height_vec,
 258                               LLVMValueRef depth_vec,
 259                               LLVMValueRef row_stride_vec,
 260                               LLVMValueRef img_stride_vec,
 261                               LLVMValueRef data_ptr,
 262                               LLVMValueRef s,
 263                               LLVMValueRef t,
 264                               LLVMValueRef r,
 265                               LLVMValueRef *colors_lo,
 266                               LLVMValueRef *colors_hi)
 267 {
 268    const int dims = texture_dims(bld->static_state->target);
 269    LLVMBuilderRef builder = bld->builder;
 270    struct lp_build_context i32, h16, u8n;
 271    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 272    LLVMValueRef i32_c8;
 273    LLVMValueRef s_ipart, t_ipart, r_ipart;
 274    LLVMValueRef x_stride;
 275    LLVMValueRef x_offset, offset;
 276    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 277
 278    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 279    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 280    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 281
 282    i32_vec_type = lp_build_vec_type(i32.type);
 283    h16_vec_type = lp_build_vec_type(h16.type);
 284    u8n_vec_type = lp_build_vec_type(u8n.type);
 285
 286    if (bld->static_state->normalized_coords) {
 287       /* s = s * width, t = t * height */
 288       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
 289       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
 290                                               coord_vec_type, "");
 291       s = lp_build_mul(&bld->coord_bld, s, fp_width);
 292       if (dims >= 2) {
 293          LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
 294                                                   coord_vec_type, "");
 295          t = lp_build_mul(&bld->coord_bld, t, fp_height);
 296          if (dims >= 3) {
 297             LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
 298                                                     coord_vec_type, "");
 299             r = lp_build_mul(&bld->coord_bld, r, fp_depth);
 300          }
 301       }
 302    }
 303
 304    /* scale coords by 256 (8 fractional bits) */
 305    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 306    if (dims >= 2)
 307       t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 308    if (dims >= 3)
 309       r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 310
 311    /* convert float to int */
 312    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 313    if (dims >= 2)
 314       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 315    if (dims >= 3)
 316       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 317
 318    /* compute floor (shift right 8) */
 319    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 320    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 321    if (dims >= 2)
 322       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 323    if (dims >= 3)
 324       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 325
 326    /* get pixel, row, image strides */
 327    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 328                                  bld->format_desc->block.bits/8);
 329
 330    /* Do texcoord wrapping, compute texel offset */
 331    lp_build_sample_wrap_nearest_int(bld,
 332                                     bld->format_desc->block.width,
 333                                     s_ipart, width_vec, x_stride,
 334                                     bld->static_state->pot_width,
 335                                     bld->static_state->wrap_s,
 336                                     &x_offset, &x_subcoord);
 337    offset = x_offset;
 338    if (dims >= 2) {
 339       LLVMValueRef y_offset;
 340       lp_build_sample_wrap_nearest_int(bld,
 341                                        bld->format_desc->block.height,
 342                                        t_ipart, height_vec, row_stride_vec,
 343                                        bld->static_state->pot_height,
 344                                        bld->static_state->wrap_t,
 345                                        &y_offset, &y_subcoord);
 346       offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
 347       if (dims >= 3) {
 348          LLVMValueRef z_offset;
 349          lp_build_sample_wrap_nearest_int(bld,
 350                                           1, /* block length (depth) */
 351                                           r_ipart, depth_vec, img_stride_vec,
 352                                           bld->static_state->pot_height,
 353                                           bld->static_state->wrap_r,
 354                                           &z_offset, &z_subcoord);
 355          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 356       }
 357       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 358          LLVMValueRef z_offset;
 359          /* The r coord is the cube face in [0,5] */
 360          z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 361          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 362       }
 363    }
 364
 365    /*
 366     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 367     *
 368     *   rgba0 rgba1 rgba2 rgba3
 369     *
 370     * bit cast them into 16 x u8
 371     *
 372     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 373     *
 374     * unpack them into two 8 x i16:
 375     *
 376     *   r0 g0 b0 a0 r1 g1 b1 a1
 377     *   r2 g2 b2 a2 r3 g3 b3 a3
 378     *
 379     * The higher 8 bits of the resulting elements will be zero.
 380     */
 381    {
 382       LLVMValueRef rgba8;
 383
 384       if (util_format_is_rgba8_variant(bld->format_desc)) {
 385          /*
 386           * Given the format is a rgba8, just read the pixels as is,
 387           * without any swizzling. Swizzling will be done later.
 388           */
 389          rgba8 = lp_build_gather(bld->builder,
 390                                  bld->texel_type.length,
 391                                  bld->format_desc->block.bits,
 392                                  bld->texel_type.width,
 393                                  data_ptr, offset);
 394
 395          rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 396       }
 397       else {
 398          rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 399                                          bld->format_desc,
 400                                          u8n.type,
 401                                          data_ptr, offset,
 402                                          x_subcoord,
 403                                          y_subcoord);
 404       }
 405
 406       /* Expand one 4*rgba8 to two 2*rgba16 */
 407       lp_build_unpack2(builder, u8n.type, h16.type,
 408                        rgba8,
 409                        colors_lo, colors_hi);
 410    }
 411 }
 412
 413
 414 /**
 415  * Sample a single texture image with (bi-)(tri-)linear sampling.
 416  * Return filtered color as two vectors of 16-bit fixed point values.
 417  */
 418 static void
 419 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 420                              LLVMValueRef width_vec,
 421                              LLVMValueRef height_vec,
 422                              LLVMValueRef depth_vec,
 423                              LLVMValueRef row_stride_vec,
 424                              LLVMValueRef img_stride_vec,
 425                              LLVMValueRef data_ptr,
 426                              LLVMValueRef s,
 427                              LLVMValueRef t,
 428                              LLVMValueRef r,
 429                              LLVMValueRef *colors_lo,
 430                              LLVMValueRef *colors_hi)
 431 {
 432    const int dims = texture_dims(bld->static_state->target);
 433    LLVMBuilderRef builder = bld->builder;
 434    struct lp_build_context i32, h16, u8n;
 435    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 436    LLVMValueRef i32_c8, i32_c128, i32_c255;
 437    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
 438    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
 439    LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
 440    LLVMValueRef x_stride, y_stride, z_stride;
 441    LLVMValueRef x_offset0, x_offset1;
 442    LLVMValueRef y_offset0, y_offset1;
 443    LLVMValueRef z_offset0, z_offset1;
 444    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
 445    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
 446    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
 447    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
 448    LLVMValueRef packed_lo, packed_hi;
 449    unsigned x, y, z;
 450    unsigned i, j, k;
 451    unsigned numj, numk;
 452
 453    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 454    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 455    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 456
 457    i32_vec_type = lp_build_vec_type(i32.type);
 458    h16_vec_type = lp_build_vec_type(h16.type);
 459    u8n_vec_type = lp_build_vec_type(u8n.type);
 460
 461    if (bld->static_state->normalized_coords) {
 462       /* s = s * width, t = t * height */
 463       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
 464       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
 465                                               coord_vec_type, "");
 466       s = lp_build_mul(&bld->coord_bld, s, fp_width);
 467       if (dims >= 2) {
 468          LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
 469                                                   coord_vec_type, "");
 470          t = lp_build_mul(&bld->coord_bld, t, fp_height);
 471       }
 472       if (dims >= 3) {
 473          LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
 474                                                  coord_vec_type, "");
 475          r = lp_build_mul(&bld->coord_bld, r, fp_depth);
 476       }
 477    }
 478
 479    /* scale coords by 256 (8 fractional bits) */
 480    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 481    if (dims >= 2)
 482       t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 483    if (dims >= 3)
 484       r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 485
 486    /* convert float to int */
 487    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 488    if (dims >= 2)
 489       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 490    if (dims >= 3)
 491       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 492
 493    /* subtract 0.5 (add -128) */
 494    i32_c128 = lp_build_const_int_vec(i32.type, -128);
 495    s = LLVMBuildAdd(builder, s, i32_c128, "");
 496    if (dims >= 2) {
 497       t = LLVMBuildAdd(builder, t, i32_c128, "");
 498    }
 499    if (dims >= 3) {
 500       r = LLVMBuildAdd(builder, r, i32_c128, "");
 501    }
 502
 503    /* compute floor (shift right 8) */
 504    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 505    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 506    if (dims >= 2)
 507       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 508    if (dims >= 3)
 509       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 510
 511    /* compute fractional part (AND with 0xff) */
 512    i32_c255 = lp_build_const_int_vec(i32.type, 255);
 513    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
 514    if (dims >= 2)
 515       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 516    if (dims >= 3)
 517       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 518
 519    /* get pixel, row and image strides */
 520    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 521                                  bld->format_desc->block.bits/8);
 522    y_stride = row_stride_vec;
 523    z_stride = img_stride_vec;
 524
 525    /* do texcoord wrapping and compute texel offsets */
 526    lp_build_sample_wrap_linear_int(bld,
 527                                    bld->format_desc->block.width,
 528                                    s_ipart, width_vec, x_stride,
 529                                    bld->static_state->pot_width,
 530                                    bld->static_state->wrap_s,
 531                                    &x_offset0, &x_offset1,
 532                                    &x_subcoord[0], &x_subcoord[1]);
 533    for (z = 0; z < 2; z++) {
 534       for (y = 0; y < 2; y++) {
 535          offset[z][y][0] = x_offset0;
 536          offset[z][y][1] = x_offset1;
 537       }
 538    }
 539
 540    if (dims >= 2) {
 541       lp_build_sample_wrap_linear_int(bld,
 542                                       bld->format_desc->block.height,
 543                                       t_ipart, height_vec, y_stride,
 544                                       bld->static_state->pot_height,
 545                                       bld->static_state->wrap_t,
 546                                       &y_offset0, &y_offset1,
 547                                       &y_subcoord[0], &y_subcoord[1]);
 548
 549       for (z = 0; z < 2; z++) {
 550          for (x = 0; x < 2; x++) {
 551             offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
 552                                            offset[z][0][x], y_offset0);
 553             offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
 554                                            offset[z][1][x], y_offset1);
 555          }
 556       }
 557    }
 558
 559    if (dims >= 3) {
 560       lp_build_sample_wrap_linear_int(bld,
 561                                       bld->format_desc->block.height,
 562                                       r_ipart, depth_vec, z_stride,
 563                                       bld->static_state->pot_depth,
 564                                       bld->static_state->wrap_r,
 565                                       &z_offset0, &z_offset1,
 566                                       &z_subcoord[0], &z_subcoord[1]);
 567       for (y = 0; y < 2; y++) {
 568          for (x = 0; x < 2; x++) {
 569             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 570                                            offset[0][y][x], z_offset0);
 571             offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
 572                                            offset[1][y][x], z_offset1);
 573          }
 574       }
 575    }
 576    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 577       LLVMValueRef z_offset;
 578       z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 579       for (y = 0; y < 2; y++) {
 580          for (x = 0; x < 2; x++) {
 581             /* The r coord is the cube face in [0,5] */
 582             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 583                                            offset[0][y][x], z_offset);
 584          }
 585       }
 586    }
 587
 588    /*
 589     * Transform 4 x i32 in
 590     *
 591     *   s_fpart = {s0, s1, s2, s3}
 592     *
 593     * into 8 x i16
 594     *
 595     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
 596     *
 597     * into two 8 x i16
 598     *
 599     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
 600     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
 601     *
 602     * and likewise for t_fpart. There is no risk of loosing precision here
 603     * since the fractional parts only use the lower 8bits.
 604     */
 605    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
 606    if (dims >= 2)
 607       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
 608    if (dims >= 3)
 609       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
 610
 611    {
 612       LLVMTypeRef elem_type = LLVMInt32Type();
 613       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
 614       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
 615       LLVMValueRef shuffle_lo;
 616       LLVMValueRef shuffle_hi;
 617
 618       for (j = 0; j < h16.type.length; j += 4) {
 619 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 620          unsigned subindex = 0;
 621 #else
 622          unsigned subindex = 1;
 623 #endif
 624          LLVMValueRef index;
 625
 626          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
 627          for (i = 0; i < 4; ++i)
 628             shuffles_lo[j + i] = index;
 629
 630          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
 631          for (i = 0; i < 4; ++i)
 632             shuffles_hi[j + i] = index;
 633       }
 634
 635       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
 636       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
 637
 638       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 639                                           shuffle_lo, "");
 640       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 641                                           shuffle_hi, "");
 642       if (dims >= 2) {
 643          t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 644                                              shuffle_lo, "");
 645          t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 646                                              shuffle_hi, "");
 647       }
 648       if (dims >= 3) {
 649          r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 650                                              shuffle_lo, "");
 651          r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 652                                              shuffle_hi, "");
 653       }
 654    }
 655
 656    /*
 657     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 658     *
 659     *   rgba0 rgba1 rgba2 rgba3
 660     *
 661     * bit cast them into 16 x u8
 662     *
 663     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 664     *
 665     * unpack them into two 8 x i16:
 666     *
 667     *   r0 g0 b0 a0 r1 g1 b1 a1
 668     *   r2 g2 b2 a2 r3 g3 b3 a3
 669     *
 670     * The higher 8 bits of the resulting elements will be zero.
 671     */
 672    numj = 1 + (dims >= 2);
 673    numk = 1 + (dims >= 3);
 674
 675    for (k = 0; k < numk; k++) {
 676       for (j = 0; j < numj; j++) {
 677          for (i = 0; i < 2; i++) {
 678             LLVMValueRef rgba8;
 679
 680             if (util_format_is_rgba8_variant(bld->format_desc)) {
 681                /*
 682                 * Given the format is a rgba8, just read the pixels as is,
 683                 * without any swizzling. Swizzling will be done later.
 684                 */
 685                rgba8 = lp_build_gather(bld->builder,
 686                                        bld->texel_type.length,
 687                                        bld->format_desc->block.bits,
 688                                        bld->texel_type.width,
 689                                        data_ptr, offset[k][j][i]);
 690
 691                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 692             }
 693             else {
 694                rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 695                                                bld->format_desc,
 696                                                u8n.type,
 697                                                data_ptr, offset[k][j][i],
 698                                                x_subcoord[i],
 699                                                y_subcoord[j]);
 700             }
 701
 702             /* Expand one 4*rgba8 to two 2*rgba16 */
 703             lp_build_unpack2(builder, u8n.type, h16.type,
 704                              rgba8,
 705                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
 706          }
 707       }
 708    }
 709
 710    /*
 711     * Linear interpolation with 8.8 fixed point.
 712     */
 713    if (dims == 1) {
 714       /* 1-D lerp */
 715       packed_lo = lp_build_lerp(&h16,
 716                                 s_fpart_lo,
 717                                 neighbors_lo[0][0][0],
 718                                 neighbors_lo[0][0][1]);
 719
 720       packed_hi = lp_build_lerp(&h16,
 721                                 s_fpart_hi,
 722                                 neighbors_hi[0][0][0],
 723                                 neighbors_hi[0][0][1]);
 724    }
 725    else {
 726       /* 2-D lerp */
 727       packed_lo = lp_build_lerp_2d(&h16,
 728                                    s_fpart_lo, t_fpart_lo,
 729                                    neighbors_lo[0][0][0],
 730                                    neighbors_lo[0][0][1],
 731                                    neighbors_lo[0][1][0],
 732                                    neighbors_lo[0][1][1]);
 733
 734       packed_hi = lp_build_lerp_2d(&h16,
 735                                    s_fpart_hi, t_fpart_hi,
 736                                    neighbors_hi[0][0][0],
 737                                    neighbors_hi[0][0][1],
 738                                    neighbors_hi[0][1][0],
 739                                    neighbors_hi[0][1][1]);
 740
 741       if (dims >= 3) {
 742          LLVMValueRef packed_lo2, packed_hi2;
 743
 744          /* lerp in the second z slice */
 745          packed_lo2 = lp_build_lerp_2d(&h16,
 746                                        s_fpart_lo, t_fpart_lo,
 747                                        neighbors_lo[1][0][0],
 748                                        neighbors_lo[1][0][1],
 749                                        neighbors_lo[1][1][0],
 750                                        neighbors_lo[1][1][1]);
 751
 752          packed_hi2 = lp_build_lerp_2d(&h16,
 753                                        s_fpart_hi, t_fpart_hi,
 754                                        neighbors_hi[1][0][0],
 755                                        neighbors_hi[1][0][1],
 756                                        neighbors_hi[1][1][0],
 757                                        neighbors_hi[1][1][1]);
 758          /* interp between two z slices */
 759          packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 760                                    packed_lo, packed_lo2);
 761          packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 762                                    packed_hi, packed_hi2);
 763       }
 764    }
 765
 766    *colors_lo = packed_lo;
 767    *colors_hi = packed_hi;
 768 }
 769
 770
 771 /**
 772  * Sample the texture/mipmap using given image filter and mip filter.
 773  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 774  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 775  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 776  */
 777 static void
 778 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 779                        unsigned img_filter,
 780                        unsigned mip_filter,
 781                        LLVMValueRef s,
 782                        LLVMValueRef t,
 783                        LLVMValueRef r,
 784                        LLVMValueRef lod_fpart,
 785                        LLVMValueRef width0_vec,
 786                        LLVMValueRef width1_vec,
 787                        LLVMValueRef height0_vec,
 788                        LLVMValueRef height1_vec,
 789                        LLVMValueRef depth0_vec,
 790                        LLVMValueRef depth1_vec,
 791                        LLVMValueRef row_stride0_vec,
 792                        LLVMValueRef row_stride1_vec,
 793                        LLVMValueRef img_stride0_vec,
 794                        LLVMValueRef img_stride1_vec,
 795                        LLVMValueRef data_ptr0,
 796                        LLVMValueRef data_ptr1,
 797                        LLVMValueRef *colors_lo,
 798                        LLVMValueRef *colors_hi)
 799 {
 800    LLVMValueRef colors0_lo, colors0_hi;
 801    LLVMValueRef colors1_lo, colors1_hi;
 802
 803    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 804       /* sample the first mipmap level */
 805       lp_build_sample_image_nearest(bld,
 806                                     width0_vec, height0_vec, depth0_vec,
 807                                     row_stride0_vec, img_stride0_vec,
 808                                     data_ptr0, s, t, r,
 809                                     &colors0_lo, &colors0_hi);
 810
 811       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 812          /* sample the second mipmap level */
 813          lp_build_sample_image_nearest(bld,
 814                                        width1_vec, height1_vec, depth1_vec,
 815                                        row_stride1_vec, img_stride1_vec,
 816                                        data_ptr1, s, t, r,
 817                                        &colors1_lo, &colors1_hi);
 818       }
 819    }
 820    else {
 821       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 822
 823       /* sample the first mipmap level */
 824       lp_build_sample_image_linear(bld,
 825                                    width0_vec, height0_vec, depth0_vec,
 826                                    row_stride0_vec, img_stride0_vec,
 827                                    data_ptr0, s, t, r,
 828                                    &colors0_lo, &colors0_hi);
 829
 830       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 831          /* sample the second mipmap level */
 832          lp_build_sample_image_linear(bld,
 833                                       width1_vec, height1_vec, depth1_vec,
 834                                       row_stride1_vec, img_stride1_vec,
 835                                       data_ptr1, s, t, r,
 836                                       &colors1_lo, &colors1_hi);
 837       }
 838    }
 839
 840    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 841       /* interpolate samples from the two mipmap levels */
 842       struct lp_build_context h16;
 843       lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
 844
 845       *colors_lo = lp_build_lerp(&h16, lod_fpart,
 846                                  colors0_lo, colors1_lo);
 847       *colors_hi = lp_build_lerp(&h16, lod_fpart,
 848                                  colors0_hi, colors1_hi);
 849    }
 850    else {
 851       /* use first/only level's colors */
 852       *colors_lo = colors0_lo;
 853       *colors_hi = colors0_hi;
 854    }
 855 }
 856
 857
 858
 859 /**
 860  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 861  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 862  * but only limited texture coord wrap modes.
 863  */
 864 void
 865 lp_build_sample_aos(struct lp_build_sample_context *bld,
 866                     unsigned unit,
 867                     LLVMValueRef s,
 868                     LLVMValueRef t,
 869                     LLVMValueRef r,
 870                     const LLVMValueRef *ddx,
 871                     const LLVMValueRef *ddy,
 872                     LLVMValueRef lod_bias, /* optional */
 873                     LLVMValueRef explicit_lod, /* optional */
 874                     LLVMValueRef width,
 875                     LLVMValueRef height,
 876                     LLVMValueRef depth,
 877                     LLVMValueRef width_vec,
 878                     LLVMValueRef height_vec,
 879                     LLVMValueRef depth_vec,
 880                     LLVMValueRef row_stride_array,
 881                     LLVMValueRef img_stride_array,
 882                     LLVMValueRef data_array,
 883                     LLVMValueRef texel_out[4])
 884 {
 885    struct lp_build_context *int_bld = &bld->int_bld;
 886    LLVMBuilderRef builder = bld->builder;
 887    const unsigned mip_filter = bld->static_state->min_mip_filter;
 888    const unsigned min_filter = bld->static_state->min_img_filter;
 889    const unsigned mag_filter = bld->static_state->mag_img_filter;
 890    const int dims = texture_dims(bld->static_state->target);
 891    LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
 892    LLVMValueRef ilevel0, ilevel1 = NULL;
 893    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
 894    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
 895    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
 896    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
 897    LLVMValueRef data_ptr0, data_ptr1 = NULL;
 898    LLVMValueRef packed, packed_lo, packed_hi;
 899    LLVMValueRef unswizzled[4];
 900    LLVMValueRef face_ddx[4], face_ddy[4];
 901    struct lp_build_context h16;
 902    LLVMTypeRef h16_vec_type;
 903
 904    /* we only support the common/simple wrap modes at this time */
 905    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
 906    if (dims >= 2)
 907       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
 908    if (dims >= 3)
 909       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
 910
 911
 912    /* make 16-bit fixed-pt builder context */
 913    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 914    h16_vec_type = lp_build_vec_type(h16.type);
 915
 916
 917    /* cube face selection, compute pre-face coords, etc. */
 918    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 919       LLVMValueRef face, face_s, face_t;
 920       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
 921       s = face_s; /* vec */
 922       t = face_t; /* vec */
 923       /* use 'r' to indicate cube face */
 924       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
 925
 926       /* recompute ddx, ddy using the new (s,t) face texcoords */
 927       face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
 928       face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
 929       face_ddx[2] = NULL;
 930       face_ddx[3] = NULL;
 931       face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
 932       face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
 933       face_ddy[2] = NULL;
 934       face_ddy[3] = NULL;
 935       ddx = face_ddx;
 936       ddy = face_ddy;
 937    }
 938
 939    /*
 940     * Compute the level of detail (float).
 941     */
 942    if (min_filter != mag_filter ||
 943        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 944       /* Need to compute lod either to choose mipmap levels or to
 945        * distinguish between minification/magnification with one mipmap level.
 946        */
 947       lp_build_lod_selector(bld, unit, ddx, ddy,
 948                             lod_bias, explicit_lod,
 949                             width, height, depth,
 950                             mip_filter,
 951                             &lod_ipart, &lod_fpart);
 952    } else {
 953       lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
 954    }
 955
 956    /*
 957     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
 958     * If mipfilter=linear, also compute the weight between the two
 959     * mipmap levels: lod_fpart
 960     */
 961    switch (mip_filter) {
 962    default:
 963       assert(0 && "bad mip_filter value in lp_build_sample_aos()");
 964       /* fall-through */
 965    case PIPE_TEX_MIPFILTER_NONE:
 966       /* always use mip level 0 */
 967       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 968          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
 969           * We should be able to set ilevel0 = const(0) but that causes
 970           * bad x86 code to be emitted.
 971           */
 972          assert(lod_ipart);
 973          lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
 974       }
 975       else {
 976          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 977       }
 978       break;
 979    case PIPE_TEX_MIPFILTER_NEAREST:
 980       assert(lod_ipart);
 981       lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
 982       break;
 983    case PIPE_TEX_MIPFILTER_LINEAR:
 984       {
 985          LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
 986          LLVMTypeRef i32_type = LLVMIntType(32);
 987          LLVMTypeRef i16_type = LLVMIntType(16);
 988
 989          assert(lod_fpart);
 990
 991          lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
 992
 993          lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
 994          lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "");
 995          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
 996          lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
 997
 998          /* the lod_fpart values will be fixed pt values in [0,1) */
 999       }
1000       break;
1001    }
1002
1003    /* compute image size(s) of source mipmap level(s) */
1004    lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
1005                                ilevel0, ilevel1,
1006                                row_stride_array, img_stride_array,
1007                                &width0_vec, &width1_vec,
1008                                &height0_vec, &height1_vec,
1009                                &depth0_vec, &depth1_vec,
1010                                &row_stride0_vec, &row_stride1_vec,
1011                                &img_stride0_vec, &img_stride1_vec);
1012
1013    /*
1014     * Get pointer(s) to image data for mipmap level(s).
1015     */
1016    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1017    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1018       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1019    }
1020
1021
1022    /*
1023     * Get/interpolate texture colors.
1024     */
1025    if (min_filter == mag_filter) {
1026       /* no need to distinquish between minification and magnification */
1027       lp_build_sample_mipmap(bld, min_filter, mip_filter,
1028                              s, t, r, lod_fpart,
1029                              width0_vec, width1_vec,
1030                              height0_vec, height1_vec,
1031                              depth0_vec, depth1_vec,
1032                              row_stride0_vec, row_stride1_vec,
1033                              img_stride0_vec, img_stride1_vec,
1034                              data_ptr0, data_ptr1,
1035                              &packed_lo, &packed_hi);
1036    }
1037    else {
1038       /* Emit conditional to choose min image filter or mag image filter
1039        * depending on the lod being > 0 or <= 0, respectively.
1040        */
1041       struct lp_build_flow_context *flow_ctx;
1042       struct lp_build_if_state if_ctx;
1043       LLVMValueRef minify;
1044
1045       flow_ctx = lp_build_flow_create(builder);
1046       lp_build_flow_scope_begin(flow_ctx);
1047
1048       packed_lo = LLVMGetUndef(h16_vec_type);
1049       packed_hi = LLVMGetUndef(h16_vec_type);
1050
1051       lp_build_flow_scope_declare(flow_ctx, &packed_lo);
1052       lp_build_flow_scope_declare(flow_ctx, &packed_hi);
1053
1054       /* minify = lod >= 0.0 */
1055       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1056                              lod_ipart, int_bld->zero, "");
1057
1058       lp_build_if(&if_ctx, flow_ctx, builder, minify);
1059       {
1060          /* Use the minification filter */
1061          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1062                                 s, t, r, lod_fpart,
1063                                 width0_vec, width1_vec,
1064                                 height0_vec, height1_vec,
1065                                 depth0_vec, depth1_vec,
1066                                 row_stride0_vec, row_stride1_vec,
1067                                 img_stride0_vec, img_stride1_vec,
1068                                 data_ptr0, data_ptr1,
1069                                 &packed_lo, &packed_hi);
1070       }
1071       lp_build_else(&if_ctx);
1072       {
1073          /* Use the magnification filter */
1074          lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
1075                                 s, t, r, NULL,
1076                                 width_vec, NULL,
1077                                 height_vec, NULL,
1078                                 depth_vec, NULL,
1079                                 row_stride0_vec, NULL,
1080                                 img_stride0_vec, NULL,
1081                                 data_ptr0, NULL,
1082                                 &packed_lo, &packed_hi);
1083       }
1084       lp_build_endif(&if_ctx);
1085
1086       lp_build_flow_scope_end(flow_ctx);
1087       lp_build_flow_destroy(flow_ctx);
1088    }
1089
1090    /* combine 'packed_lo', 'packed_hi' into 'packed' */
1091    {
1092       struct lp_build_context h16, u8n;
1093
1094       lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1095       lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1096
1097       packed = lp_build_pack2(builder, h16.type, u8n.type,
1098                               packed_lo, packed_hi);
1099    }
1100
1101    /*
1102     * Convert to SoA and swizzle.
1103     */
1104    lp_build_rgba8_to_f32_soa(builder,
1105                              bld->texel_type,
1106                              packed, unswizzled);
1107
1108    if (util_format_is_rgba8_variant(bld->format_desc)) {
1109       lp_build_format_swizzle_soa(bld->format_desc,
1110                                   &bld->texel_bld,
1111                                   unswizzled, texel_out);
1112    }
1113    else {
1114       texel_out[0] = unswizzled[0];
1115       texel_out[1] = unswizzled[1];
1116       texel_out[2] = unswizzled[2];
1117       texel_out[3] = unswizzled[3];
1118    }
1119
1120    apply_sampler_swizzle(bld, texel_out);
1121 }