src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_flow.h"
  52 #include "lp_bld_gather.h"
  53 #include "lp_bld_format.h"
  54 #include "lp_bld_sample.h"
  55 #include "lp_bld_sample_aos.h"
  56 #include "lp_bld_quad.h"
  57
  58
  59 /**
  60  * Build LLVM code for texture coord wrapping, for nearest filtering,
  61  * for scaled integer texcoords.
  62  * \param block_length  is the length of the pixel block along the
  63  *                      coordinate axis
  64  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
  65  * \param length  the texture size along one dimension
  66  * \param stride  pixel stride along the coordinate axis (in bytes)
  67  * \param is_pot  if TRUE, length is a power of two
  68  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  69  * \param out_offset  byte offset for the wrapped coordinate
  70  * \param out_i  resulting sub-block pixel coordinate for coord0
  71  */
  72 static void
  73 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  74                                  unsigned block_length,
  75                                  LLVMValueRef coord,
  76                                  LLVMValueRef length,
  77                                  LLVMValueRef stride,
  78                                  boolean is_pot,
  79                                  unsigned wrap_mode,
  80                                  LLVMValueRef *out_offset,
  81                                  LLVMValueRef *out_i)
  82 {
  83    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
  84    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  85    LLVMValueRef length_minus_one;
  86
  87    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
  88
  89    switch(wrap_mode) {
  90    case PIPE_TEX_WRAP_REPEAT:
  91       if(is_pot)
  92          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
  93       else
  94          /* Signed remainder won't give the right results for negative
  95           * dividends but unsigned remainder does.*/
  96          coord = LLVMBuildURem(bld->builder, coord, length, "");
  97       break;
  98
  99    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 100       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 101       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 102       break;
 103
 104    case PIPE_TEX_WRAP_CLAMP:
 105    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 106    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 107    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 108    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 109    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 110    default:
 111       assert(0);
 112    }
 113
 114    lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
 115                                   out_offset, out_i);
 116 }
 117
 118
 119 /**
 120  * Build LLVM code for texture coord wrapping, for linear filtering,
 121  * for scaled integer texcoords.
 122  * \param block_length  is the length of the pixel block along the
 123  *                      coordinate axis
 124  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 125  * \param length  the texture size along one dimension
 126  * \param stride  pixel stride along the coordinate axis (in bytes)
 127  * \param is_pot  if TRUE, length is a power of two
 128  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 129  * \param offset0  resulting relative offset for coord0
 130  * \param offset1  resulting relative offset for coord0 + 1
 131  * \param i0  resulting sub-block pixel coordinate for coord0
 132  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 133  */
 134 static void
 135 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 136                                 unsigned block_length,
 137                                 LLVMValueRef coord0,
 138                                 LLVMValueRef length,
 139                                 LLVMValueRef stride,
 140                                 boolean is_pot,
 141                                 unsigned wrap_mode,
 142                                 LLVMValueRef *offset0,
 143                                 LLVMValueRef *offset1,
 144                                 LLVMValueRef *i0,
 145                                 LLVMValueRef *i1)
 146 {
 147    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 148    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 149    LLVMValueRef length_minus_one;
 150    LLVMValueRef lmask, umask, mask;
 151
 152    if (block_length != 1) {
 153       /*
 154        * If the pixel block covers more than one pixel then there is no easy
 155        * way to calculate offset1 relative to offset0. Instead, compute them
 156        * independently.
 157        */
 158
 159       LLVMValueRef coord1;
 160
 161       lp_build_sample_wrap_nearest_int(bld,
 162                                        block_length,
 163                                        coord0,
 164                                        length,
 165                                        stride,
 166                                        is_pot,
 167                                        wrap_mode,
 168                                        offset0, i0);
 169
 170       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 171
 172       lp_build_sample_wrap_nearest_int(bld,
 173                                        block_length,
 174                                        coord1,
 175                                        length,
 176                                        stride,
 177                                        is_pot,
 178                                        wrap_mode,
 179                                        offset1, i1);
 180
 181       return;
 182    }
 183
 184    /*
 185     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
 186     * multiplication.
 187     */
 188
 189    *i0 = uint_coord_bld->zero;
 190    *i1 = uint_coord_bld->zero;
 191
 192    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 193
 194    switch(wrap_mode) {
 195    case PIPE_TEX_WRAP_REPEAT:
 196       if (is_pot) {
 197          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 198       }
 199       else {
 200          /* Signed remainder won't give the right results for negative
 201           * dividends but unsigned remainder does.*/
 202          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 203       }
 204
 205       mask = lp_build_compare(bld->builder, int_coord_bld->type,
 206                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 207
 208       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 209       *offset1 = LLVMBuildAnd(bld->builder,
 210                               lp_build_add(uint_coord_bld, *offset0, stride),
 211                               mask, "");
 212       break;
 213
 214    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 215       lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 216                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 217       umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 218                                PIPE_FUNC_LESS, coord0, length_minus_one);
 219
 220       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 221       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 222
 223       mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
 224
 225       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 226       *offset1 = lp_build_add(uint_coord_bld,
 227                               *offset0,
 228                               LLVMBuildAnd(bld->builder, stride, mask, ""));
 229       break;
 230
 231    case PIPE_TEX_WRAP_CLAMP:
 232    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 233    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 234    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 235    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 236    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 237    default:
 238       assert(0);
 239       *offset0 = uint_coord_bld->zero;
 240       *offset1 = uint_coord_bld->zero;
 241       break;
 242    }
 243 }
 244
 245
 246 /**
 247  * Sample a single texture image with nearest sampling.
 248  * If sampling a cube texture, r = cube face in [0,5].
 249  * Return filtered color as two vectors of 16-bit fixed point values.
 250  */
 251 static void
 252 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 253                               LLVMValueRef width_vec,
 254                               LLVMValueRef height_vec,
 255                               LLVMValueRef depth_vec,
 256                               LLVMValueRef row_stride_vec,
 257                               LLVMValueRef img_stride_vec,
 258                               LLVMValueRef data_ptr,
 259                               LLVMValueRef s,
 260                               LLVMValueRef t,
 261                               LLVMValueRef r,
 262                               LLVMValueRef *colors_lo,
 263                               LLVMValueRef *colors_hi)
 264 {
 265    const int dims = texture_dims(bld->static_state->target);
 266    LLVMBuilderRef builder = bld->builder;
 267    struct lp_build_context i32, h16, u8n;
 268    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 269    LLVMValueRef i32_c8;
 270    LLVMValueRef s_ipart, t_ipart, r_ipart;
 271    LLVMValueRef x_stride;
 272    LLVMValueRef x_offset, offset;
 273    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 274
 275    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 276    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 277    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 278
 279    i32_vec_type = lp_build_vec_type(i32.type);
 280    h16_vec_type = lp_build_vec_type(h16.type);
 281    u8n_vec_type = lp_build_vec_type(u8n.type);
 282
 283    if (bld->static_state->normalized_coords) {
 284       /* s = s * width, t = t * height */
 285       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
 286       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
 287                                               coord_vec_type, "");
 288       s = lp_build_mul(&bld->coord_bld, s, fp_width);
 289       if (dims >= 2) {
 290          LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
 291                                                   coord_vec_type, "");
 292          t = lp_build_mul(&bld->coord_bld, t, fp_height);
 293          if (dims >= 3) {
 294             LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
 295                                                     coord_vec_type, "");
 296             r = lp_build_mul(&bld->coord_bld, r, fp_depth);
 297          }
 298       }
 299    }
 300
 301    /* scale coords by 256 (8 fractional bits) */
 302    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 303    if (dims >= 2)
 304       t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 305    if (dims >= 3)
 306       r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 307
 308    /* convert float to int */
 309    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 310    if (dims >= 2)
 311       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 312    if (dims >= 3)
 313       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 314
 315    /* compute floor (shift right 8) */
 316    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 317    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 318    if (dims >= 2)
 319       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 320    if (dims >= 3)
 321       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 322
 323    /* get pixel, row, image strides */
 324    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 325                                  bld->format_desc->block.bits/8);
 326
 327    /* Do texcoord wrapping, compute texel offset */
 328    lp_build_sample_wrap_nearest_int(bld,
 329                                     bld->format_desc->block.width,
 330                                     s_ipart, width_vec, x_stride,
 331                                     bld->static_state->pot_width,
 332                                     bld->static_state->wrap_s,
 333                                     &x_offset, &x_subcoord);
 334    offset = x_offset;
 335    if (dims >= 2) {
 336       LLVMValueRef y_offset;
 337       lp_build_sample_wrap_nearest_int(bld,
 338                                        bld->format_desc->block.height,
 339                                        t_ipart, height_vec, row_stride_vec,
 340                                        bld->static_state->pot_height,
 341                                        bld->static_state->wrap_t,
 342                                        &y_offset, &y_subcoord);
 343       offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
 344       if (dims >= 3) {
 345          LLVMValueRef z_offset;
 346          lp_build_sample_wrap_nearest_int(bld,
 347                                           1, /* block length (depth) */
 348                                           r_ipart, depth_vec, img_stride_vec,
 349                                           bld->static_state->pot_height,
 350                                           bld->static_state->wrap_r,
 351                                           &z_offset, &z_subcoord);
 352          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 353       }
 354       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 355          LLVMValueRef z_offset;
 356          /* The r coord is the cube face in [0,5] */
 357          z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 358          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 359       }
 360    }
 361
 362    /*
 363     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 364     *
 365     *   rgba0 rgba1 rgba2 rgba3
 366     *
 367     * bit cast them into 16 x u8
 368     *
 369     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 370     *
 371     * unpack them into two 8 x i16:
 372     *
 373     *   r0 g0 b0 a0 r1 g1 b1 a1
 374     *   r2 g2 b2 a2 r3 g3 b3 a3
 375     *
 376     * The higher 8 bits of the resulting elements will be zero.
 377     */
 378    {
 379       LLVMValueRef rgba8;
 380
 381       if (util_format_is_rgba8_variant(bld->format_desc)) {
 382          /*
 383           * Given the format is a rgba8, just read the pixels as is,
 384           * without any swizzling. Swizzling will be done later.
 385           */
 386          rgba8 = lp_build_gather(bld->builder,
 387                                  bld->texel_type.length,
 388                                  bld->format_desc->block.bits,
 389                                  bld->texel_type.width,
 390                                  data_ptr, offset);
 391
 392          rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 393       }
 394       else {
 395          rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 396                                          bld->format_desc,
 397                                          u8n.type,
 398                                          data_ptr, offset,
 399                                          x_subcoord,
 400                                          y_subcoord);
 401       }
 402
 403       /* Expand one 4*rgba8 to two 2*rgba16 */
 404       lp_build_unpack2(builder, u8n.type, h16.type,
 405                        rgba8,
 406                        colors_lo, colors_hi);
 407    }
 408 }
 409
 410
 411 /**
 412  * Sample a single texture image with (bi-)(tri-)linear sampling.
 413  * Return filtered color as two vectors of 16-bit fixed point values.
 414  */
 415 static void
 416 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 417                              LLVMValueRef width_vec,
 418                              LLVMValueRef height_vec,
 419                              LLVMValueRef depth_vec,
 420                              LLVMValueRef row_stride_vec,
 421                              LLVMValueRef img_stride_vec,
 422                              LLVMValueRef data_ptr,
 423                              LLVMValueRef s,
 424                              LLVMValueRef t,
 425                              LLVMValueRef r,
 426                              LLVMValueRef *colors_lo,
 427                              LLVMValueRef *colors_hi)
 428 {
 429    const int dims = texture_dims(bld->static_state->target);
 430    LLVMBuilderRef builder = bld->builder;
 431    struct lp_build_context i32, h16, u8n;
 432    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 433    LLVMValueRef i32_c8, i32_c128, i32_c255;
 434    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
 435    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
 436    LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
 437    LLVMValueRef x_stride, y_stride, z_stride;
 438    LLVMValueRef x_offset0, x_offset1;
 439    LLVMValueRef y_offset0, y_offset1;
 440    LLVMValueRef z_offset0, z_offset1;
 441    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
 442    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
 443    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
 444    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
 445    LLVMValueRef packed_lo, packed_hi;
 446    unsigned x, y, z;
 447    unsigned i, j, k;
 448    unsigned numj, numk;
 449
 450    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 451    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 452    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 453
 454    i32_vec_type = lp_build_vec_type(i32.type);
 455    h16_vec_type = lp_build_vec_type(h16.type);
 456    u8n_vec_type = lp_build_vec_type(u8n.type);
 457
 458    if (bld->static_state->normalized_coords) {
 459       /* s = s * width, t = t * height */
 460       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
 461       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
 462                                               coord_vec_type, "");
 463       s = lp_build_mul(&bld->coord_bld, s, fp_width);
 464       if (dims >= 2) {
 465          LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
 466                                                   coord_vec_type, "");
 467          t = lp_build_mul(&bld->coord_bld, t, fp_height);
 468       }
 469       if (dims >= 3) {
 470          LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
 471                                                  coord_vec_type, "");
 472          r = lp_build_mul(&bld->coord_bld, r, fp_depth);
 473       }
 474    }
 475
 476    /* scale coords by 256 (8 fractional bits) */
 477    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 478    if (dims >= 2)
 479       t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 480    if (dims >= 3)
 481       r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 482
 483    /* convert float to int */
 484    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 485    if (dims >= 2)
 486       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 487    if (dims >= 3)
 488       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 489
 490    /* subtract 0.5 (add -128) */
 491    i32_c128 = lp_build_const_int_vec(i32.type, -128);
 492    s = LLVMBuildAdd(builder, s, i32_c128, "");
 493    if (dims >= 2) {
 494       t = LLVMBuildAdd(builder, t, i32_c128, "");
 495    }
 496    if (dims >= 3) {
 497       r = LLVMBuildAdd(builder, r, i32_c128, "");
 498    }
 499
 500    /* compute floor (shift right 8) */
 501    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 502    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 503    if (dims >= 2)
 504       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 505    if (dims >= 3)
 506       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 507
 508    /* compute fractional part (AND with 0xff) */
 509    i32_c255 = lp_build_const_int_vec(i32.type, 255);
 510    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
 511    if (dims >= 2)
 512       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 513    if (dims >= 3)
 514       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 515
 516    /* get pixel, row and image strides */
 517    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 518                                  bld->format_desc->block.bits/8);
 519    y_stride = row_stride_vec;
 520    z_stride = img_stride_vec;
 521
 522    /* do texcoord wrapping and compute texel offsets */
 523    lp_build_sample_wrap_linear_int(bld,
 524                                    bld->format_desc->block.width,
 525                                    s_ipart, width_vec, x_stride,
 526                                    bld->static_state->pot_width,
 527                                    bld->static_state->wrap_s,
 528                                    &x_offset0, &x_offset1,
 529                                    &x_subcoord[0], &x_subcoord[1]);
 530    for (z = 0; z < 2; z++) {
 531       for (y = 0; y < 2; y++) {
 532          offset[z][y][0] = x_offset0;
 533          offset[z][y][1] = x_offset1;
 534       }
 535    }
 536
 537    if (dims >= 2) {
 538       lp_build_sample_wrap_linear_int(bld,
 539                                       bld->format_desc->block.height,
 540                                       t_ipart, height_vec, y_stride,
 541                                       bld->static_state->pot_height,
 542                                       bld->static_state->wrap_t,
 543                                       &y_offset0, &y_offset1,
 544                                       &y_subcoord[0], &y_subcoord[1]);
 545
 546       for (z = 0; z < 2; z++) {
 547          for (x = 0; x < 2; x++) {
 548             offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
 549                                            offset[z][0][x], y_offset0);
 550             offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
 551                                            offset[z][1][x], y_offset1);
 552          }
 553       }
 554    }
 555
 556    if (dims >= 3) {
 557       lp_build_sample_wrap_linear_int(bld,
 558                                       bld->format_desc->block.height,
 559                                       r_ipart, depth_vec, z_stride,
 560                                       bld->static_state->pot_depth,
 561                                       bld->static_state->wrap_r,
 562                                       &z_offset0, &z_offset1,
 563                                       &z_subcoord[0], &z_subcoord[1]);
 564       for (y = 0; y < 2; y++) {
 565          for (x = 0; x < 2; x++) {
 566             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 567                                            offset[0][y][x], z_offset0);
 568             offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
 569                                            offset[1][y][x], z_offset1);
 570          }
 571       }
 572    }
 573    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 574       LLVMValueRef z_offset;
 575       z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 576       for (y = 0; y < 2; y++) {
 577          for (x = 0; x < 2; x++) {
 578             /* The r coord is the cube face in [0,5] */
 579             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 580                                            offset[0][y][x], z_offset);
 581          }
 582       }
 583    }
 584
 585    /*
 586     * Transform 4 x i32 in
 587     *
 588     *   s_fpart = {s0, s1, s2, s3}
 589     *
 590     * into 8 x i16
 591     *
 592     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
 593     *
 594     * into two 8 x i16
 595     *
 596     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
 597     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
 598     *
 599     * and likewise for t_fpart. There is no risk of loosing precision here
 600     * since the fractional parts only use the lower 8bits.
 601     */
 602    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
 603    if (dims >= 2)
 604       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
 605    if (dims >= 3)
 606       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
 607
 608    {
 609       LLVMTypeRef elem_type = LLVMInt32Type();
 610       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
 611       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
 612       LLVMValueRef shuffle_lo;
 613       LLVMValueRef shuffle_hi;
 614
 615       for (j = 0; j < h16.type.length; j += 4) {
 616 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 617          unsigned subindex = 0;
 618 #else
 619          unsigned subindex = 1;
 620 #endif
 621          LLVMValueRef index;
 622
 623          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
 624          for (i = 0; i < 4; ++i)
 625             shuffles_lo[j + i] = index;
 626
 627          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
 628          for (i = 0; i < 4; ++i)
 629             shuffles_hi[j + i] = index;
 630       }
 631
 632       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
 633       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
 634
 635       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 636                                           shuffle_lo, "");
 637       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 638                                           shuffle_hi, "");
 639       if (dims >= 2) {
 640          t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 641                                              shuffle_lo, "");
 642          t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 643                                              shuffle_hi, "");
 644       }
 645       if (dims >= 3) {
 646          r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 647                                              shuffle_lo, "");
 648          r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 649                                              shuffle_hi, "");
 650       }
 651    }
 652
 653    /*
 654     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 655     *
 656     *   rgba0 rgba1 rgba2 rgba3
 657     *
 658     * bit cast them into 16 x u8
 659     *
 660     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 661     *
 662     * unpack them into two 8 x i16:
 663     *
 664     *   r0 g0 b0 a0 r1 g1 b1 a1
 665     *   r2 g2 b2 a2 r3 g3 b3 a3
 666     *
 667     * The higher 8 bits of the resulting elements will be zero.
 668     */
 669    numj = 1 + (dims >= 2);
 670    numk = 1 + (dims >= 3);
 671
 672    for (k = 0; k < numk; k++) {
 673       for (j = 0; j < numj; j++) {
 674          for (i = 0; i < 2; i++) {
 675             LLVMValueRef rgba8;
 676
 677             if (util_format_is_rgba8_variant(bld->format_desc)) {
 678                /*
 679                 * Given the format is a rgba8, just read the pixels as is,
 680                 * without any swizzling. Swizzling will be done later.
 681                 */
 682                rgba8 = lp_build_gather(bld->builder,
 683                                        bld->texel_type.length,
 684                                        bld->format_desc->block.bits,
 685                                        bld->texel_type.width,
 686                                        data_ptr, offset[k][j][i]);
 687
 688                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 689             }
 690             else {
 691                rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 692                                                bld->format_desc,
 693                                                u8n.type,
 694                                                data_ptr, offset[k][j][i],
 695                                                x_subcoord[i],
 696                                                y_subcoord[j]);
 697             }
 698
 699             /* Expand one 4*rgba8 to two 2*rgba16 */
 700             lp_build_unpack2(builder, u8n.type, h16.type,
 701                              rgba8,
 702                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
 703          }
 704       }
 705    }
 706
 707    /*
 708     * Linear interpolation with 8.8 fixed point.
 709     */
 710    if (dims == 1) {
 711       /* 1-D lerp */
 712       packed_lo = lp_build_lerp(&h16,
 713                                 s_fpart_lo,
 714                                 neighbors_lo[0][0][0],
 715                                 neighbors_lo[0][0][1]);
 716
 717       packed_hi = lp_build_lerp(&h16,
 718                                 s_fpart_hi,
 719                                 neighbors_hi[0][0][0],
 720                                 neighbors_hi[0][0][1]);
 721    }
 722    else {
 723       /* 2-D lerp */
 724       packed_lo = lp_build_lerp_2d(&h16,
 725                                    s_fpart_lo, t_fpart_lo,
 726                                    neighbors_lo[0][0][0],
 727                                    neighbors_lo[0][0][1],
 728                                    neighbors_lo[0][1][0],
 729                                    neighbors_lo[0][1][1]);
 730
 731       packed_hi = lp_build_lerp_2d(&h16,
 732                                    s_fpart_hi, t_fpart_hi,
 733                                    neighbors_hi[0][0][0],
 734                                    neighbors_hi[0][0][1],
 735                                    neighbors_hi[0][1][0],
 736                                    neighbors_hi[0][1][1]);
 737
 738       if (dims >= 3) {
 739          LLVMValueRef packed_lo2, packed_hi2;
 740
 741          /* lerp in the second z slice */
 742          packed_lo2 = lp_build_lerp_2d(&h16,
 743                                        s_fpart_lo, t_fpart_lo,
 744                                        neighbors_lo[1][0][0],
 745                                        neighbors_lo[1][0][1],
 746                                        neighbors_lo[1][1][0],
 747                                        neighbors_lo[1][1][1]);
 748
 749          packed_hi2 = lp_build_lerp_2d(&h16,
 750                                        s_fpart_hi, t_fpart_hi,
 751                                        neighbors_hi[1][0][0],
 752                                        neighbors_hi[1][0][1],
 753                                        neighbors_hi[1][1][0],
 754                                        neighbors_hi[1][1][1]);
 755          /* interp between two z slices */
 756          packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 757                                    packed_lo, packed_lo2);
 758          packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 759                                    packed_hi, packed_hi2);
 760       }
 761    }
 762
 763    *colors_lo = packed_lo;
 764    *colors_hi = packed_hi;
 765 }
 766
 767
 768 /**
 769  * Sample the texture/mipmap using given image filter and mip filter.
 770  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 771  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 772  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 773  */
 774 static void
 775 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 776                        unsigned img_filter,
 777                        unsigned mip_filter,
 778                        LLVMValueRef s,
 779                        LLVMValueRef t,
 780                        LLVMValueRef r,
 781                        LLVMValueRef lod_fpart,
 782                        LLVMValueRef width0_vec,
 783                        LLVMValueRef width1_vec,
 784                        LLVMValueRef height0_vec,
 785                        LLVMValueRef height1_vec,
 786                        LLVMValueRef depth0_vec,
 787                        LLVMValueRef depth1_vec,
 788                        LLVMValueRef row_stride0_vec,
 789                        LLVMValueRef row_stride1_vec,
 790                        LLVMValueRef img_stride0_vec,
 791                        LLVMValueRef img_stride1_vec,
 792                        LLVMValueRef data_ptr0,
 793                        LLVMValueRef data_ptr1,
 794                        LLVMValueRef *colors_lo,
 795                        LLVMValueRef *colors_hi)
 796 {
 797    LLVMValueRef colors0_lo, colors0_hi;
 798    LLVMValueRef colors1_lo, colors1_hi;
 799
 800    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 801       /* sample the first mipmap level */
 802       lp_build_sample_image_nearest(bld,
 803                                     width0_vec, height0_vec, depth0_vec,
 804                                     row_stride0_vec, img_stride0_vec,
 805                                     data_ptr0, s, t, r,
 806                                     &colors0_lo, &colors0_hi);
 807
 808       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 809          /* sample the second mipmap level */
 810          lp_build_sample_image_nearest(bld,
 811                                        width1_vec, height1_vec, depth1_vec,
 812                                        row_stride1_vec, img_stride1_vec,
 813                                        data_ptr1, s, t, r,
 814                                        &colors1_lo, &colors1_hi);
 815       }
 816    }
 817    else {
 818       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 819
 820       /* sample the first mipmap level */
 821       lp_build_sample_image_linear(bld,
 822                                    width0_vec, height0_vec, depth0_vec,
 823                                    row_stride0_vec, img_stride0_vec,
 824                                    data_ptr0, s, t, r,
 825                                    &colors0_lo, &colors0_hi);
 826
 827       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 828          /* sample the second mipmap level */
 829          lp_build_sample_image_linear(bld,
 830                                       width1_vec, height1_vec, depth1_vec,
 831                                       row_stride1_vec, img_stride1_vec,
 832                                       data_ptr1, s, t, r,
 833                                       &colors1_lo, &colors1_hi);
 834       }
 835    }
 836
 837    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 838       /* interpolate samples from the two mipmap levels */
 839       struct lp_build_context h16;
 840       lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
 841
 842       *colors_lo = lp_build_lerp(&h16, lod_fpart,
 843                                  colors0_lo, colors1_lo);
 844       *colors_hi = lp_build_lerp(&h16, lod_fpart,
 845                                  colors0_hi, colors1_hi);
 846    }
 847    else {
 848       /* use first/only level's colors */
 849       *colors_lo = colors0_lo;
 850       *colors_hi = colors0_hi;
 851    }
 852 }
 853
 854
 855
 856 /**
 857  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 858  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 859  * but only limited texture coord wrap modes.
 860  */
 861 void
 862 lp_build_sample_aos(struct lp_build_sample_context *bld,
 863                     unsigned unit,
 864                     LLVMValueRef s,
 865                     LLVMValueRef t,
 866                     LLVMValueRef r,
 867                     const LLVMValueRef *ddx,
 868                     const LLVMValueRef *ddy,
 869                     LLVMValueRef lod_bias, /* optional */
 870                     LLVMValueRef explicit_lod, /* optional */
 871                     LLVMValueRef width,
 872                     LLVMValueRef height,
 873                     LLVMValueRef depth,
 874                     LLVMValueRef width_vec,
 875                     LLVMValueRef height_vec,
 876                     LLVMValueRef depth_vec,
 877                     LLVMValueRef row_stride_array,
 878                     LLVMValueRef img_stride_array,
 879                     LLVMValueRef data_array,
 880                     LLVMValueRef texel_out[4])
 881 {
 882    struct lp_build_context *float_bld = &bld->float_bld;
 883    LLVMBuilderRef builder = bld->builder;
 884    const unsigned mip_filter = bld->static_state->min_mip_filter;
 885    const unsigned min_filter = bld->static_state->min_img_filter;
 886    const unsigned mag_filter = bld->static_state->mag_img_filter;
 887    const int dims = texture_dims(bld->static_state->target);
 888    LLVMValueRef lod = NULL, lod_fpart = NULL;
 889    LLVMValueRef ilevel0, ilevel1 = NULL;
 890    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
 891    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
 892    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
 893    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
 894    LLVMValueRef data_ptr0, data_ptr1 = NULL;
 895    LLVMValueRef packed, packed_lo, packed_hi;
 896    LLVMValueRef unswizzled[4];
 897    LLVMValueRef face_ddx[4], face_ddy[4];
 898    struct lp_build_context h16;
 899    LLVMTypeRef h16_vec_type;
 900
 901    /* we only support the common/simple wrap modes at this time */
 902    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
 903    if (dims >= 2)
 904       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
 905    if (dims >= 3)
 906       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
 907
 908
 909    /* make 16-bit fixed-pt builder context */
 910    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 911    h16_vec_type = lp_build_vec_type(h16.type);
 912
 913
 914    /* cube face selection, compute pre-face coords, etc. */
 915    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 916       LLVMValueRef face, face_s, face_t;
 917       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
 918       s = face_s; /* vec */
 919       t = face_t; /* vec */
 920       /* use 'r' to indicate cube face */
 921       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
 922
 923       /* recompute ddx, ddy using the new (s,t) face texcoords */
 924       face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
 925       face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
 926       face_ddx[2] = NULL;
 927       face_ddx[3] = NULL;
 928       face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
 929       face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
 930       face_ddy[2] = NULL;
 931       face_ddy[3] = NULL;
 932       ddx = face_ddx;
 933       ddy = face_ddy;
 934    }
 935
 936
 937    /*
 938     * Compute the level of detail (float).
 939     */
 940    if (min_filter != mag_filter ||
 941        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 942       /* Need to compute lod either to choose mipmap levels or to
 943        * distinguish between minification/magnification with one mipmap level.
 944        */
 945       lod = lp_build_lod_selector(bld, unit, ddx, ddy,
 946                                   lod_bias, explicit_lod,
 947                                   width, height, depth);
 948    }
 949
 950    /*
 951     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
 952     * If mipfilter=linear, also compute the weight between the two
 953     * mipmap levels: lod_fpart
 954     */
 955    switch (mip_filter) {
 956    default:
 957       assert(0 && "bad mip_filter value in lp_build_sample_aos()");
 958       /* fall-through */
 959    case PIPE_TEX_MIPFILTER_NONE:
 960       /* always use mip level 0 */
 961       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 962          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
 963           * We should be able to set ilevel0 = const(0) but that causes
 964           * bad x86 code to be emitted.
 965           */
 966          lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
 967          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
 968       }
 969       else {
 970          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 971       }
 972       break;
 973    case PIPE_TEX_MIPFILTER_NEAREST:
 974       assert(lod);
 975       lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
 976       break;
 977    case PIPE_TEX_MIPFILTER_LINEAR:
 978       {
 979          LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
 980          LLVMValueRef i255 = lp_build_const_int32(255);
 981          LLVMTypeRef i16_type = LLVMIntType(16);
 982
 983          assert(lod);
 984
 985          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
 986                                     &lod_fpart);
 987          lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
 988          lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
 989          lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
 990          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
 991          lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
 992
 993          /* the lod_fpart values will be fixed pt values in [0,1) */
 994       }
 995       break;
 996    }
 997
 998    /* compute image size(s) of source mipmap level(s) */
 999    lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
1000                                ilevel0, ilevel1,
1001                                row_stride_array, img_stride_array,
1002                                &width0_vec, &width1_vec,
1003                                &height0_vec, &height1_vec,
1004                                &depth0_vec, &depth1_vec,
1005                                &row_stride0_vec, &row_stride1_vec,
1006                                &img_stride0_vec, &img_stride1_vec);
1007
1008    /*
1009     * Get pointer(s) to image data for mipmap level(s).
1010     */
1011    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1012    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1013       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1014    }
1015
1016
1017    /*
1018     * Get/interpolate texture colors.
1019     */
1020    if (min_filter == mag_filter) {
1021       /* no need to distinquish between minification and magnification */
1022       lp_build_sample_mipmap(bld, min_filter, mip_filter,
1023                              s, t, r, lod_fpart,
1024                              width0_vec, width1_vec,
1025                              height0_vec, height1_vec,
1026                              depth0_vec, depth1_vec,
1027                              row_stride0_vec, row_stride1_vec,
1028                              img_stride0_vec, img_stride1_vec,
1029                              data_ptr0, data_ptr1,
1030                              &packed_lo, &packed_hi);
1031    }
1032    else {
1033       /* Emit conditional to choose min image filter or mag image filter
1034        * depending on the lod being > 0 or <= 0, respectively.
1035        */
1036       struct lp_build_flow_context *flow_ctx;
1037       struct lp_build_if_state if_ctx;
1038       LLVMValueRef minify;
1039
1040       flow_ctx = lp_build_flow_create(builder);
1041       lp_build_flow_scope_begin(flow_ctx);
1042
1043       packed_lo = LLVMGetUndef(h16_vec_type);
1044       packed_hi = LLVMGetUndef(h16_vec_type);
1045
1046       lp_build_flow_scope_declare(flow_ctx, &packed_lo);
1047       lp_build_flow_scope_declare(flow_ctx, &packed_hi);
1048
1049       /* minify = lod > 0.0 */
1050       minify = LLVMBuildFCmp(builder, LLVMRealUGE,
1051                              lod, float_bld->zero, "");
1052
1053       lp_build_if(&if_ctx, flow_ctx, builder, minify);
1054       {
1055          /* Use the minification filter */
1056          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1057                                 s, t, r, lod_fpart,
1058                                 width0_vec, width1_vec,
1059                                 height0_vec, height1_vec,
1060                                 depth0_vec, depth1_vec,
1061                                 row_stride0_vec, row_stride1_vec,
1062                                 img_stride0_vec, img_stride1_vec,
1063                                 data_ptr0, data_ptr1,
1064                                 &packed_lo, &packed_hi);
1065       }
1066       lp_build_else(&if_ctx);
1067       {
1068          /* Use the magnification filter */
1069          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1070                                 s, t, r, lod_fpart,
1071                                 width0_vec, width1_vec,
1072                                 height0_vec, height1_vec,
1073                                 depth0_vec, depth1_vec,
1074                                 row_stride0_vec, row_stride1_vec,
1075                                 img_stride0_vec, img_stride1_vec,
1076                                 data_ptr0, data_ptr1,
1077                                 &packed_lo, &packed_hi);
1078       }
1079       lp_build_endif(&if_ctx);
1080
1081       lp_build_flow_scope_end(flow_ctx);
1082       lp_build_flow_destroy(flow_ctx);
1083    }
1084
1085    /* combine 'packed_lo', 'packed_hi' into 'packed' */
1086    {
1087       struct lp_build_context h16, u8n;
1088
1089       lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1090       lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1091
1092       packed = lp_build_pack2(builder, h16.type, u8n.type,
1093                               packed_lo, packed_hi);
1094    }
1095
1096    /*
1097     * Convert to SoA and swizzle.
1098     */
1099    lp_build_rgba8_to_f32_soa(builder,
1100                              bld->texel_type,
1101                              packed, unswizzled);
1102
1103    if (util_format_is_rgba8_variant(bld->format_desc)) {
1104       lp_build_format_swizzle_soa(bld->format_desc,
1105                                   &bld->texel_bld,
1106                                   unswizzled, texel_out);
1107    }
1108    else {
1109       texel_out[0] = unswizzled[0];
1110       texel_out[1] = unswizzled[1];
1111       texel_out[2] = unswizzled[2];
1112       texel_out[3] = unswizzled[3];
1113    }
1114
1115    apply_sampler_swizzle(bld, texel_out);
1116 }