src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_logic.h"
  50 #include "lp_bld_swizzle.h"
  51 #include "lp_bld_pack.h"
  52 #include "lp_bld_flow.h"
  53 #include "lp_bld_gather.h"
  54 #include "lp_bld_format.h"
  55 #include "lp_bld_sample.h"
  56 #include "lp_bld_sample_aos.h"
  57 #include "lp_bld_quad.h"
  58
  59
  60 /**
  61  * Build LLVM code for texture coord wrapping, for nearest filtering,
  62  * for scaled integer texcoords.
  63  * \param block_length  is the length of the pixel block along the
  64  *                      coordinate axis
  65  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
  66  * \param length  the texture size along one dimension
  67  * \param stride  pixel stride along the coordinate axis (in bytes)
  68  * \param is_pot  if TRUE, length is a power of two
  69  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  70  * \param out_offset  byte offset for the wrapped coordinate
  71  * \param out_i  resulting sub-block pixel coordinate for coord0
  72  */
  73 static void
  74 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  75                                  unsigned block_length,
  76                                  LLVMValueRef coord,
  77                                  LLVMValueRef length,
  78                                  LLVMValueRef stride,
  79                                  boolean is_pot,
  80                                  unsigned wrap_mode,
  81                                  LLVMValueRef *out_offset,
  82                                  LLVMValueRef *out_i)
  83 {
  84    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
  85    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  86    LLVMValueRef length_minus_one;
  87
  88    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
  89
  90    switch(wrap_mode) {
  91    case PIPE_TEX_WRAP_REPEAT:
  92       if(is_pot)
  93          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
  94       else
  95          /* Signed remainder won't give the right results for negative
  96           * dividends but unsigned remainder does.*/
  97          coord = LLVMBuildURem(bld->builder, coord, length, "");
  98       break;
  99
 100    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 101       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 102       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 103       break;
 104
 105    case PIPE_TEX_WRAP_CLAMP:
 106    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 107    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 108    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 109    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 110    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 111    default:
 112       assert(0);
 113    }
 114
 115    lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
 116                                   out_offset, out_i);
 117 }
 118
 119
 120 /**
 121  * Build LLVM code for texture coord wrapping, for linear filtering,
 122  * for scaled integer texcoords.
 123  * \param block_length  is the length of the pixel block along the
 124  *                      coordinate axis
 125  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 126  * \param length  the texture size along one dimension
 127  * \param stride  pixel stride along the coordinate axis (in bytes)
 128  * \param is_pot  if TRUE, length is a power of two
 129  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 130  * \param offset0  resulting relative offset for coord0
 131  * \param offset1  resulting relative offset for coord0 + 1
 132  * \param i0  resulting sub-block pixel coordinate for coord0
 133  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 134  */
 135 static void
 136 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 137                                 unsigned block_length,
 138                                 LLVMValueRef coord0,
 139                                 LLVMValueRef length,
 140                                 LLVMValueRef stride,
 141                                 boolean is_pot,
 142                                 unsigned wrap_mode,
 143                                 LLVMValueRef *offset0,
 144                                 LLVMValueRef *offset1,
 145                                 LLVMValueRef *i0,
 146                                 LLVMValueRef *i1)
 147 {
 148    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 149    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 150    LLVMValueRef length_minus_one;
 151    LLVMValueRef lmask, umask, mask;
 152
 153    if (block_length != 1) {
 154       /*
 155        * If the pixel block covers more than one pixel then there is no easy
 156        * way to calculate offset1 relative to offset0. Instead, compute them
 157        * independently.
 158        */
 159
 160       LLVMValueRef coord1;
 161
 162       lp_build_sample_wrap_nearest_int(bld,
 163                                        block_length,
 164                                        coord0,
 165                                        length,
 166                                        stride,
 167                                        is_pot,
 168                                        wrap_mode,
 169                                        offset0, i0);
 170
 171       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 172
 173       lp_build_sample_wrap_nearest_int(bld,
 174                                        block_length,
 175                                        coord1,
 176                                        length,
 177                                        stride,
 178                                        is_pot,
 179                                        wrap_mode,
 180                                        offset1, i1);
 181
 182       return;
 183    }
 184
 185    /*
 186     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
 187     * multiplication.
 188     */
 189
 190    *i0 = uint_coord_bld->zero;
 191    *i1 = uint_coord_bld->zero;
 192
 193    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 194
 195    switch(wrap_mode) {
 196    case PIPE_TEX_WRAP_REPEAT:
 197       if (is_pot) {
 198          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 199       }
 200       else {
 201          /* Signed remainder won't give the right results for negative
 202           * dividends but unsigned remainder does.*/
 203          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 204       }
 205
 206       mask = lp_build_compare(bld->builder, int_coord_bld->type,
 207                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 208
 209       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 210       *offset1 = LLVMBuildAnd(bld->builder,
 211                               lp_build_add(uint_coord_bld, *offset0, stride),
 212                               mask, "");
 213       break;
 214
 215    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 216       lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 217                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 218       umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 219                                PIPE_FUNC_LESS, coord0, length_minus_one);
 220
 221       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 222       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 223
 224       mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
 225
 226       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 227       *offset1 = lp_build_add(uint_coord_bld,
 228                               *offset0,
 229                               LLVMBuildAnd(bld->builder, stride, mask, ""));
 230       break;
 231
 232    case PIPE_TEX_WRAP_CLAMP:
 233    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 234    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 235    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 236    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 237    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 238    default:
 239       assert(0);
 240       *offset0 = uint_coord_bld->zero;
 241       *offset1 = uint_coord_bld->zero;
 242       break;
 243    }
 244 }
 245
 246
 247 /**
 248  * Sample a single texture image with nearest sampling.
 249  * If sampling a cube texture, r = cube face in [0,5].
 250  * Return filtered color as two vectors of 16-bit fixed point values.
 251  */
 252 static void
 253 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 254                               LLVMValueRef width_vec,
 255                               LLVMValueRef height_vec,
 256                               LLVMValueRef depth_vec,
 257                               LLVMValueRef row_stride_vec,
 258                               LLVMValueRef img_stride_vec,
 259                               LLVMValueRef data_ptr,
 260                               LLVMValueRef s,
 261                               LLVMValueRef t,
 262                               LLVMValueRef r,
 263                               LLVMValueRef *colors_lo,
 264                               LLVMValueRef *colors_hi)
 265 {
 266    const int dims = texture_dims(bld->static_state->target);
 267    LLVMBuilderRef builder = bld->builder;
 268    struct lp_build_context i32, h16, u8n;
 269    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 270    LLVMValueRef i32_c8;
 271    LLVMValueRef s_ipart, t_ipart, r_ipart;
 272    LLVMValueRef x_stride;
 273    LLVMValueRef x_offset, offset;
 274    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 275
 276    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 277    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 278    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 279
 280    i32_vec_type = lp_build_vec_type(i32.type);
 281    h16_vec_type = lp_build_vec_type(h16.type);
 282    u8n_vec_type = lp_build_vec_type(u8n.type);
 283
 284    if (bld->static_state->normalized_coords) {
 285       /* s = s * width, t = t * height */
 286       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
 287       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
 288                                               coord_vec_type, "");
 289       s = lp_build_mul(&bld->coord_bld, s, fp_width);
 290       if (dims >= 2) {
 291          LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
 292                                                   coord_vec_type, "");
 293          t = lp_build_mul(&bld->coord_bld, t, fp_height);
 294          if (dims >= 3) {
 295             LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
 296                                                     coord_vec_type, "");
 297             r = lp_build_mul(&bld->coord_bld, r, fp_depth);
 298          }
 299       }
 300    }
 301
 302    /* scale coords by 256 (8 fractional bits) */
 303    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 304    if (dims >= 2)
 305       t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 306    if (dims >= 3)
 307       r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 308
 309    /* convert float to int */
 310    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 311    if (dims >= 2)
 312       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 313    if (dims >= 3)
 314       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 315
 316    /* compute floor (shift right 8) */
 317    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 318    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 319    if (dims >= 2)
 320       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 321    if (dims >= 3)
 322       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 323
 324    /* get pixel, row, image strides */
 325    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 326                                  bld->format_desc->block.bits/8);
 327
 328    /* Do texcoord wrapping, compute texel offset */
 329    lp_build_sample_wrap_nearest_int(bld,
 330                                     bld->format_desc->block.width,
 331                                     s_ipart, width_vec, x_stride,
 332                                     bld->static_state->pot_width,
 333                                     bld->static_state->wrap_s,
 334                                     &x_offset, &x_subcoord);
 335    offset = x_offset;
 336    if (dims >= 2) {
 337       LLVMValueRef y_offset;
 338       lp_build_sample_wrap_nearest_int(bld,
 339                                        bld->format_desc->block.height,
 340                                        t_ipart, height_vec, row_stride_vec,
 341                                        bld->static_state->pot_height,
 342                                        bld->static_state->wrap_t,
 343                                        &y_offset, &y_subcoord);
 344       offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
 345       if (dims >= 3) {
 346          LLVMValueRef z_offset;
 347          lp_build_sample_wrap_nearest_int(bld,
 348                                           1, /* block length (depth) */
 349                                           r_ipart, depth_vec, img_stride_vec,
 350                                           bld->static_state->pot_height,
 351                                           bld->static_state->wrap_r,
 352                                           &z_offset, &z_subcoord);
 353          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 354       }
 355       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 356          LLVMValueRef z_offset;
 357          /* The r coord is the cube face in [0,5] */
 358          z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 359          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 360       }
 361    }
 362
 363    /*
 364     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 365     *
 366     *   rgba0 rgba1 rgba2 rgba3
 367     *
 368     * bit cast them into 16 x u8
 369     *
 370     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 371     *
 372     * unpack them into two 8 x i16:
 373     *
 374     *   r0 g0 b0 a0 r1 g1 b1 a1
 375     *   r2 g2 b2 a2 r3 g3 b3 a3
 376     *
 377     * The higher 8 bits of the resulting elements will be zero.
 378     */
 379    {
 380       LLVMValueRef rgba8;
 381
 382       if (util_format_is_rgba8_variant(bld->format_desc)) {
 383          /*
 384           * Given the format is a rgba8, just read the pixels as is,
 385           * without any swizzling. Swizzling will be done later.
 386           */
 387          rgba8 = lp_build_gather(bld->builder,
 388                                  bld->texel_type.length,
 389                                  bld->format_desc->block.bits,
 390                                  bld->texel_type.width,
 391                                  data_ptr, offset);
 392
 393          rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 394       }
 395       else {
 396          rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 397                                          bld->format_desc,
 398                                          u8n.type,
 399                                          data_ptr, offset,
 400                                          x_subcoord,
 401                                          y_subcoord);
 402       }
 403
 404       /* Expand one 4*rgba8 to two 2*rgba16 */
 405       lp_build_unpack2(builder, u8n.type, h16.type,
 406                        rgba8,
 407                        colors_lo, colors_hi);
 408    }
 409 }
 410
 411
 412 /**
 413  * Sample a single texture image with (bi-)(tri-)linear sampling.
 414  * Return filtered color as two vectors of 16-bit fixed point values.
 415  */
 416 static void
 417 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 418                              LLVMValueRef width_vec,
 419                              LLVMValueRef height_vec,
 420                              LLVMValueRef depth_vec,
 421                              LLVMValueRef row_stride_vec,
 422                              LLVMValueRef img_stride_vec,
 423                              LLVMValueRef data_ptr,
 424                              LLVMValueRef s,
 425                              LLVMValueRef t,
 426                              LLVMValueRef r,
 427                              LLVMValueRef *colors_lo,
 428                              LLVMValueRef *colors_hi)
 429 {
 430    const int dims = texture_dims(bld->static_state->target);
 431    LLVMBuilderRef builder = bld->builder;
 432    struct lp_build_context i32, h16, u8n;
 433    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 434    LLVMValueRef i32_c8, i32_c128, i32_c255;
 435    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
 436    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
 437    LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
 438    LLVMValueRef x_stride, y_stride, z_stride;
 439    LLVMValueRef x_offset0, x_offset1;
 440    LLVMValueRef y_offset0, y_offset1;
 441    LLVMValueRef z_offset0, z_offset1;
 442    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
 443    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
 444    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
 445    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
 446    LLVMValueRef packed_lo, packed_hi;
 447    unsigned x, y, z;
 448    unsigned i, j, k;
 449    unsigned numj, numk;
 450
 451    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 452    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 453    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 454
 455    i32_vec_type = lp_build_vec_type(i32.type);
 456    h16_vec_type = lp_build_vec_type(h16.type);
 457    u8n_vec_type = lp_build_vec_type(u8n.type);
 458
 459    if (bld->static_state->normalized_coords) {
 460       /* s = s * width, t = t * height */
 461       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
 462       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
 463                                               coord_vec_type, "");
 464       s = lp_build_mul(&bld->coord_bld, s, fp_width);
 465       if (dims >= 2) {
 466          LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
 467                                                   coord_vec_type, "");
 468          t = lp_build_mul(&bld->coord_bld, t, fp_height);
 469       }
 470       if (dims >= 3) {
 471          LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
 472                                                  coord_vec_type, "");
 473          r = lp_build_mul(&bld->coord_bld, r, fp_depth);
 474       }
 475    }
 476
 477    /* scale coords by 256 (8 fractional bits) */
 478    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 479    if (dims >= 2)
 480       t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 481    if (dims >= 3)
 482       r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 483
 484    /* convert float to int */
 485    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 486    if (dims >= 2)
 487       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 488    if (dims >= 3)
 489       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 490
 491    /* subtract 0.5 (add -128) */
 492    i32_c128 = lp_build_const_int_vec(i32.type, -128);
 493    if (!bld->static_state->force_nearest_s) {
 494       s = LLVMBuildAdd(builder, s, i32_c128, "");
 495    }
 496    if (dims >= 2 && !bld->static_state->force_nearest_t) {
 497       t = LLVMBuildAdd(builder, t, i32_c128, "");
 498    }
 499    if (dims >= 3) {
 500       r = LLVMBuildAdd(builder, r, i32_c128, "");
 501    }
 502
 503    /* compute floor (shift right 8) */
 504    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 505    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 506    if (dims >= 2)
 507       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 508    if (dims >= 3)
 509       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 510
 511    /* compute fractional part (AND with 0xff) */
 512    i32_c255 = lp_build_const_int_vec(i32.type, 255);
 513    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
 514    if (dims >= 2)
 515       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 516    if (dims >= 3)
 517       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 518
 519    /* get pixel, row and image strides */
 520    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 521                                  bld->format_desc->block.bits/8);
 522    y_stride = row_stride_vec;
 523    z_stride = img_stride_vec;
 524
 525    /* do texcoord wrapping and compute texel offsets */
 526    lp_build_sample_wrap_linear_int(bld,
 527                                    bld->format_desc->block.width,
 528                                    s_ipart, width_vec, x_stride,
 529                                    bld->static_state->pot_width,
 530                                    bld->static_state->wrap_s,
 531                                    &x_offset0, &x_offset1,
 532                                    &x_subcoord[0], &x_subcoord[1]);
 533    for (z = 0; z < 2; z++) {
 534       for (y = 0; y < 2; y++) {
 535          offset[z][y][0] = x_offset0;
 536          offset[z][y][1] = x_offset1;
 537       }
 538    }
 539
 540    if (dims >= 2) {
 541       lp_build_sample_wrap_linear_int(bld,
 542                                       bld->format_desc->block.height,
 543                                       t_ipart, height_vec, y_stride,
 544                                       bld->static_state->pot_height,
 545                                       bld->static_state->wrap_t,
 546                                       &y_offset0, &y_offset1,
 547                                       &y_subcoord[0], &y_subcoord[1]);
 548
 549       for (z = 0; z < 2; z++) {
 550          for (x = 0; x < 2; x++) {
 551             offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
 552                                            offset[z][0][x], y_offset0);
 553             offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
 554                                            offset[z][1][x], y_offset1);
 555          }
 556       }
 557    }
 558
 559    if (dims >= 3) {
 560       lp_build_sample_wrap_linear_int(bld,
 561                                       bld->format_desc->block.height,
 562                                       r_ipart, depth_vec, z_stride,
 563                                       bld->static_state->pot_depth,
 564                                       bld->static_state->wrap_r,
 565                                       &z_offset0, &z_offset1,
 566                                       &z_subcoord[0], &z_subcoord[1]);
 567       for (y = 0; y < 2; y++) {
 568          for (x = 0; x < 2; x++) {
 569             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 570                                            offset[0][y][x], z_offset0);
 571             offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
 572                                            offset[1][y][x], z_offset1);
 573          }
 574       }
 575    }
 576    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 577       LLVMValueRef z_offset;
 578       z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 579       for (y = 0; y < 2; y++) {
 580          for (x = 0; x < 2; x++) {
 581             /* The r coord is the cube face in [0,5] */
 582             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 583                                            offset[0][y][x], z_offset);
 584          }
 585       }
 586    }
 587
 588    /*
 589     * Transform 4 x i32 in
 590     *
 591     *   s_fpart = {s0, s1, s2, s3}
 592     *
 593     * into 8 x i16
 594     *
 595     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
 596     *
 597     * into two 8 x i16
 598     *
 599     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
 600     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
 601     *
 602     * and likewise for t_fpart. There is no risk of loosing precision here
 603     * since the fractional parts only use the lower 8bits.
 604     */
 605    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
 606    if (dims >= 2)
 607       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
 608    if (dims >= 3)
 609       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
 610
 611    {
 612       LLVMTypeRef elem_type = LLVMInt32Type();
 613       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
 614       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
 615       LLVMValueRef shuffle_lo;
 616       LLVMValueRef shuffle_hi;
 617
 618       for (j = 0; j < h16.type.length; j += 4) {
 619 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 620          unsigned subindex = 0;
 621 #else
 622          unsigned subindex = 1;
 623 #endif
 624          LLVMValueRef index;
 625
 626          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
 627          for (i = 0; i < 4; ++i)
 628             shuffles_lo[j + i] = index;
 629
 630          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
 631          for (i = 0; i < 4; ++i)
 632             shuffles_hi[j + i] = index;
 633       }
 634
 635       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
 636       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
 637
 638       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 639                                           shuffle_lo, "");
 640       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 641                                           shuffle_hi, "");
 642       if (dims >= 2) {
 643          t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 644                                              shuffle_lo, "");
 645          t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 646                                              shuffle_hi, "");
 647       }
 648       if (dims >= 3) {
 649          r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 650                                              shuffle_lo, "");
 651          r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 652                                              shuffle_hi, "");
 653       }
 654    }
 655
 656    /*
 657     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 658     *
 659     *   rgba0 rgba1 rgba2 rgba3
 660     *
 661     * bit cast them into 16 x u8
 662     *
 663     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 664     *
 665     * unpack them into two 8 x i16:
 666     *
 667     *   r0 g0 b0 a0 r1 g1 b1 a1
 668     *   r2 g2 b2 a2 r3 g3 b3 a3
 669     *
 670     * The higher 8 bits of the resulting elements will be zero.
 671     */
 672    numj = 1 + (dims >= 2);
 673    numk = 1 + (dims >= 3);
 674
 675    for (k = 0; k < numk; k++) {
 676       for (j = 0; j < numj; j++) {
 677          for (i = 0; i < 2; i++) {
 678             LLVMValueRef rgba8;
 679
 680             if (util_format_is_rgba8_variant(bld->format_desc)) {
 681                /*
 682                 * Given the format is a rgba8, just read the pixels as is,
 683                 * without any swizzling. Swizzling will be done later.
 684                 */
 685                rgba8 = lp_build_gather(bld->builder,
 686                                        bld->texel_type.length,
 687                                        bld->format_desc->block.bits,
 688                                        bld->texel_type.width,
 689                                        data_ptr, offset[k][j][i]);
 690
 691                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 692             }
 693             else {
 694                rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 695                                                bld->format_desc,
 696                                                u8n.type,
 697                                                data_ptr, offset[k][j][i],
 698                                                x_subcoord[i],
 699                                                y_subcoord[j]);
 700             }
 701
 702             /* Expand one 4*rgba8 to two 2*rgba16 */
 703             lp_build_unpack2(builder, u8n.type, h16.type,
 704                              rgba8,
 705                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
 706          }
 707       }
 708    }
 709
 710    /*
 711     * Linear interpolation with 8.8 fixed point.
 712     */
 713    if (bld->static_state->force_nearest_s) {
 714       /* special case 1-D lerp */
 715       packed_lo = lp_build_lerp(&h16,
 716                                 t_fpart_lo,
 717                                 neighbors_lo[0][0][0],
 718                                 neighbors_lo[0][0][1]);
 719
 720       packed_hi = lp_build_lerp(&h16,
 721                                 t_fpart_hi,
 722                                 neighbors_hi[0][1][0],
 723                                 neighbors_hi[0][1][0]);
 724    }
 725    else if (bld->static_state->force_nearest_t) {
 726       /* special case 1-D lerp */
 727       packed_lo = lp_build_lerp(&h16,
 728                                 s_fpart_lo,
 729                                 neighbors_lo[0][0][0],
 730                                 neighbors_lo[0][0][1]);
 731
 732       packed_hi = lp_build_lerp(&h16,
 733                                 s_fpart_hi,
 734                                 neighbors_hi[0][0][0],
 735                                 neighbors_hi[0][0][1]);
 736    }
 737    else {
 738       /* general 1/2/3-D lerping */
 739       if (dims == 1) {
 740          packed_lo = lp_build_lerp(&h16,
 741                                    s_fpart_lo,
 742                                    neighbors_lo[0][0][0],
 743                                    neighbors_lo[0][0][1]);
 744
 745          packed_hi = lp_build_lerp(&h16,
 746                                    s_fpart_hi,
 747                                    neighbors_hi[0][0][0],
 748                                    neighbors_hi[0][0][1]);
 749       }
 750       else {
 751          /* 2-D lerp */
 752          packed_lo = lp_build_lerp_2d(&h16,
 753                                       s_fpart_lo, t_fpart_lo,
 754                                       neighbors_lo[0][0][0],
 755                                       neighbors_lo[0][0][1],
 756                                       neighbors_lo[0][1][0],
 757                                       neighbors_lo[0][1][1]);
 758
 759          packed_hi = lp_build_lerp_2d(&h16,
 760                                       s_fpart_hi, t_fpart_hi,
 761                                       neighbors_hi[0][0][0],
 762                                       neighbors_hi[0][0][1],
 763                                       neighbors_hi[0][1][0],
 764                                       neighbors_hi[0][1][1]);
 765
 766          if (dims >= 3) {
 767             LLVMValueRef packed_lo2, packed_hi2;
 768
 769             /* lerp in the second z slice */
 770             packed_lo2 = lp_build_lerp_2d(&h16,
 771                                           s_fpart_lo, t_fpart_lo,
 772                                           neighbors_lo[1][0][0],
 773                                           neighbors_lo[1][0][1],
 774                                           neighbors_lo[1][1][0],
 775                                           neighbors_lo[1][1][1]);
 776
 777             packed_hi2 = lp_build_lerp_2d(&h16,
 778                                           s_fpart_hi, t_fpart_hi,
 779                                           neighbors_hi[1][0][0],
 780                                           neighbors_hi[1][0][1],
 781                                           neighbors_hi[1][1][0],
 782                                           neighbors_hi[1][1][1]);
 783             /* interp between two z slices */
 784             packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 785                                       packed_lo, packed_lo2);
 786             packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 787                                       packed_hi, packed_hi2);
 788          }
 789       }
 790    }
 791
 792    *colors_lo = packed_lo;
 793    *colors_hi = packed_hi;
 794 }
 795
 796
 797 /**
 798  * Sample the texture/mipmap using given image filter and mip filter.
 799  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 800  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 801  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 802  */
 803 static void
 804 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 805                        unsigned img_filter,
 806                        unsigned mip_filter,
 807                        LLVMValueRef s,
 808                        LLVMValueRef t,
 809                        LLVMValueRef r,
 810                        LLVMValueRef lod_fpart,
 811                        LLVMValueRef width0_vec,
 812                        LLVMValueRef width1_vec,
 813                        LLVMValueRef height0_vec,
 814                        LLVMValueRef height1_vec,
 815                        LLVMValueRef depth0_vec,
 816                        LLVMValueRef depth1_vec,
 817                        LLVMValueRef row_stride0_vec,
 818                        LLVMValueRef row_stride1_vec,
 819                        LLVMValueRef img_stride0_vec,
 820                        LLVMValueRef img_stride1_vec,
 821                        LLVMValueRef data_ptr0,
 822                        LLVMValueRef data_ptr1,
 823                        LLVMValueRef *colors_lo,
 824                        LLVMValueRef *colors_hi)
 825 {
 826    LLVMValueRef colors0_lo, colors0_hi;
 827    LLVMValueRef colors1_lo, colors1_hi;
 828
 829    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 830       /* sample the first mipmap level */
 831       lp_build_sample_image_nearest(bld,
 832                                     width0_vec, height0_vec, depth0_vec,
 833                                     row_stride0_vec, img_stride0_vec,
 834                                     data_ptr0, s, t, r,
 835                                     &colors0_lo, &colors0_hi);
 836
 837       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 838          /* sample the second mipmap level */
 839          lp_build_sample_image_nearest(bld,
 840                                        width1_vec, height1_vec, depth1_vec,
 841                                        row_stride1_vec, img_stride1_vec,
 842                                        data_ptr1, s, t, r,
 843                                        &colors1_lo, &colors1_hi);
 844       }
 845    }
 846    else {
 847       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 848
 849       /* sample the first mipmap level */
 850       lp_build_sample_image_linear(bld,
 851                                    width0_vec, height0_vec, depth0_vec,
 852                                    row_stride0_vec, img_stride0_vec,
 853                                    data_ptr0, s, t, r,
 854                                    &colors0_lo, &colors0_hi);
 855
 856       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 857          /* sample the second mipmap level */
 858          lp_build_sample_image_linear(bld,
 859                                       width1_vec, height1_vec, depth1_vec,
 860                                       row_stride1_vec, img_stride1_vec,
 861                                       data_ptr1, s, t, r,
 862                                       &colors1_lo, &colors1_hi);
 863       }
 864    }
 865
 866    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 867       /* interpolate samples from the two mipmap levels */
 868       struct lp_build_context h16;
 869       lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
 870
 871       *colors_lo = lp_build_lerp(&h16, lod_fpart,
 872                                  colors0_lo, colors1_lo);
 873       *colors_hi = lp_build_lerp(&h16, lod_fpart,
 874                                  colors0_hi, colors1_hi);
 875    }
 876    else {
 877       /* use first/only level's colors */
 878       *colors_lo = colors0_lo;
 879       *colors_hi = colors0_hi;
 880    }
 881 }
 882
 883
 884
 885 /**
 886  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 887  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 888  * but only limited texture coord wrap modes.
 889  */
 890 void
 891 lp_build_sample_aos(struct lp_build_sample_context *bld,
 892                     unsigned unit,
 893                     LLVMValueRef s,
 894                     LLVMValueRef t,
 895                     LLVMValueRef r,
 896                     const LLVMValueRef *ddx,
 897                     const LLVMValueRef *ddy,
 898                     LLVMValueRef lod_bias, /* optional */
 899                     LLVMValueRef explicit_lod, /* optional */
 900                     LLVMValueRef width,
 901                     LLVMValueRef height,
 902                     LLVMValueRef depth,
 903                     LLVMValueRef width_vec,
 904                     LLVMValueRef height_vec,
 905                     LLVMValueRef depth_vec,
 906                     LLVMValueRef row_stride_array,
 907                     LLVMValueRef img_stride_array,
 908                     LLVMValueRef data_array,
 909                     LLVMValueRef texel_out[4])
 910 {
 911    struct lp_build_context *float_bld = &bld->float_bld;
 912    LLVMBuilderRef builder = bld->builder;
 913    const unsigned mip_filter = bld->static_state->min_mip_filter;
 914    const unsigned min_filter = bld->static_state->min_img_filter;
 915    const unsigned mag_filter = bld->static_state->mag_img_filter;
 916    const int dims = texture_dims(bld->static_state->target);
 917    LLVMValueRef lod = NULL, lod_fpart = NULL;
 918    LLVMValueRef ilevel0, ilevel1 = NULL;
 919    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
 920    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
 921    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
 922    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
 923    LLVMValueRef data_ptr0, data_ptr1 = NULL;
 924    LLVMValueRef packed, packed_lo, packed_hi;
 925    LLVMValueRef unswizzled[4];
 926    LLVMValueRef face_ddx[4], face_ddy[4];
 927    struct lp_build_context h16;
 928    LLVMTypeRef h16_vec_type;
 929
 930    /* we only support the common/simple wrap modes at this time */
 931    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
 932    if (dims >= 2)
 933       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
 934    if (dims >= 3)
 935       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
 936
 937
 938    /* make 16-bit fixed-pt builder context */
 939    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 940    h16_vec_type = lp_build_vec_type(h16.type);
 941
 942
 943    /* cube face selection, compute pre-face coords, etc. */
 944    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 945       LLVMValueRef face, face_s, face_t;
 946       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
 947       s = face_s; /* vec */
 948       t = face_t; /* vec */
 949       /* use 'r' to indicate cube face */
 950       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
 951
 952       /* recompute ddx, ddy using the new (s,t) face texcoords */
 953       face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
 954       face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
 955       face_ddx[2] = NULL;
 956       face_ddx[3] = NULL;
 957       face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
 958       face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
 959       face_ddy[2] = NULL;
 960       face_ddy[3] = NULL;
 961       ddx = face_ddx;
 962       ddy = face_ddy;
 963    }
 964
 965
 966    /*
 967     * Compute the level of detail (float).
 968     */
 969    if (min_filter != mag_filter ||
 970        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 971       /* Need to compute lod either to choose mipmap levels or to
 972        * distinguish between minification/magnification with one mipmap level.
 973        */
 974       lod = lp_build_lod_selector(bld, ddx, ddy,
 975                                   lod_bias, explicit_lod,
 976                                   width, height, depth);
 977    }
 978
 979    /*
 980     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
 981     * If mipfilter=linear, also compute the weight between the two
 982     * mipmap levels: lod_fpart
 983     */
 984    switch (mip_filter) {
 985    default:
 986       assert(0 && "bad mip_filter value in lp_build_sample_aos()");
 987       /* fall-through */
 988    case PIPE_TEX_MIPFILTER_NONE:
 989       /* always use mip level 0 */
 990       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 991          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
 992           * We should be able to set ilevel0 = const(0) but that causes
 993           * bad x86 code to be emitted.
 994           */
 995          lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
 996          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
 997       }
 998       else {
 999          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1000       }
1001       break;
1002    case PIPE_TEX_MIPFILTER_NEAREST:
1003       assert(lod);
1004       lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1005       break;
1006    case PIPE_TEX_MIPFILTER_LINEAR:
1007       {
1008          LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
1009          LLVMValueRef i255 = lp_build_const_int32(255);
1010          LLVMTypeRef i16_type = LLVMIntType(16);
1011
1012          assert(lod);
1013
1014          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1015                                     &lod_fpart);
1016          lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
1017          lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
1018          lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
1019          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
1020          lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
1021
1022          /* the lod_fpart values will be fixed pt values in [0,1) */
1023       }
1024       break;
1025    }
1026
1027    /* compute image size(s) of source mipmap level(s) */
1028    lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
1029                                ilevel0, ilevel1,
1030                                row_stride_array, img_stride_array,
1031                                &width0_vec, &width1_vec,
1032                                &height0_vec, &height1_vec,
1033                                &depth0_vec, &depth1_vec,
1034                                &row_stride0_vec, &row_stride1_vec,
1035                                &img_stride0_vec, &img_stride1_vec);
1036
1037    /*
1038     * Get pointer(s) to image data for mipmap level(s).
1039     */
1040    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1041    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1042       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1043    }
1044
1045
1046    /*
1047     * Get/interpolate texture colors.
1048     */
1049    if (min_filter == mag_filter) {
1050       /* no need to distinquish between minification and magnification */
1051       lp_build_sample_mipmap(bld, min_filter, mip_filter,
1052                              s, t, r, lod_fpart,
1053                              width0_vec, width1_vec,
1054                              height0_vec, height1_vec,
1055                              depth0_vec, depth1_vec,
1056                              row_stride0_vec, row_stride1_vec,
1057                              img_stride0_vec, img_stride1_vec,
1058                              data_ptr0, data_ptr1,
1059                              &packed_lo, &packed_hi);
1060    }
1061    else {
1062       /* Emit conditional to choose min image filter or mag image filter
1063        * depending on the lod being > 0 or <= 0, respectively.
1064        */
1065       struct lp_build_flow_context *flow_ctx;
1066       struct lp_build_if_state if_ctx;
1067       LLVMValueRef minify;
1068
1069       flow_ctx = lp_build_flow_create(builder);
1070       lp_build_flow_scope_begin(flow_ctx);
1071
1072       packed_lo = LLVMGetUndef(h16_vec_type);
1073       packed_hi = LLVMGetUndef(h16_vec_type);
1074
1075       lp_build_flow_scope_declare(flow_ctx, &packed_lo);
1076       lp_build_flow_scope_declare(flow_ctx, &packed_hi);
1077
1078       /* minify = lod > 0.0 */
1079       minify = LLVMBuildFCmp(builder, LLVMRealUGE,
1080                              lod, float_bld->zero, "");
1081
1082       lp_build_if(&if_ctx, flow_ctx, builder, minify);
1083       {
1084          /* Use the minification filter */
1085          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1086                                 s, t, r, lod_fpart,
1087                                 width0_vec, width1_vec,
1088                                 height0_vec, height1_vec,
1089                                 depth0_vec, depth1_vec,
1090                                 row_stride0_vec, row_stride1_vec,
1091                                 img_stride0_vec, img_stride1_vec,
1092                                 data_ptr0, data_ptr1,
1093                                 &packed_lo, &packed_hi);
1094       }
1095       lp_build_else(&if_ctx);
1096       {
1097          /* Use the magnification filter */
1098          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1099                                 s, t, r, lod_fpart,
1100                                 width0_vec, width1_vec,
1101                                 height0_vec, height1_vec,
1102                                 depth0_vec, depth1_vec,
1103                                 row_stride0_vec, row_stride1_vec,
1104                                 img_stride0_vec, img_stride1_vec,
1105                                 data_ptr0, data_ptr1,
1106                                 &packed_lo, &packed_hi);
1107       }
1108       lp_build_endif(&if_ctx);
1109
1110       lp_build_flow_scope_end(flow_ctx);
1111       lp_build_flow_destroy(flow_ctx);
1112    }
1113
1114    /* combine 'packed_lo', 'packed_hi' into 'packed' */
1115    {
1116       struct lp_build_context h16, u8n;
1117
1118       lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1119       lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1120
1121       packed = lp_build_pack2(builder, h16.type, u8n.type,
1122                               packed_lo, packed_hi);
1123    }
1124
1125    /*
1126     * Convert to SoA and swizzle.
1127     */
1128    lp_build_rgba8_to_f32_soa(builder,
1129                              bld->texel_type,
1130                              packed, unswizzled);
1131
1132    if (util_format_is_rgba8_variant(bld->format_desc)) {
1133       lp_build_format_swizzle_soa(bld->format_desc,
1134                                   &bld->texel_bld,
1135                                   unswizzled, texel_out);
1136    }
1137    else {
1138       texel_out[0] = unswizzled[0];
1139       texel_out[1] = unswizzled[1];
1140       texel_out[2] = unswizzled[2];
1141       texel_out[3] = unswizzled[3];
1142    }
1143
1144    apply_sampler_swizzle(bld, texel_out);
1145 }