src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- AoS.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_bitarit.h"
  50 #include "lp_bld_logic.h"
  51 #include "lp_bld_swizzle.h"
  52 #include "lp_bld_pack.h"
  53 #include "lp_bld_flow.h"
  54 #include "lp_bld_gather.h"
  55 #include "lp_bld_format.h"
  56 #include "lp_bld_init.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_quad.h"
  60
  61
  62 /**
  63  * Build LLVM code for texture coord wrapping, for nearest filtering,
  64  * for scaled integer texcoords.
  65  * \param block_length  is the length of the pixel block along the
  66  *                      coordinate axis
  67  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
  68  * \param length  the texture size along one dimension
  69  * \param stride  pixel stride along the coordinate axis (in bytes)
  70  * \param is_pot  if TRUE, length is a power of two
  71  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  72  * \param out_offset  byte offset for the wrapped coordinate
  73  * \param out_i  resulting sub-block pixel coordinate for coord0
  74  */
  75 static void
  76 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  77                                  unsigned block_length,
  78                                  LLVMValueRef coord,
  79                                  LLVMValueRef coord_f,
  80                                  LLVMValueRef length,
  81                                  LLVMValueRef stride,
  82                                  boolean is_pot,
  83                                  unsigned wrap_mode,
  84                                  LLVMValueRef *out_offset,
  85                                  LLVMValueRef *out_i)
  86 {
  87    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  88    LLVMBuilderRef builder = bld->gallivm->builder;
  89    LLVMValueRef length_minus_one;
  90
  91    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  92
  93    switch(wrap_mode) {
  94    case PIPE_TEX_WRAP_REPEAT:
  95       if(is_pot)
  96          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
  97       else {
  98          struct lp_build_context *coord_bld = &bld->coord_bld;
  99          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 100          coord = lp_build_fract_safe(coord_bld, coord_f);
 101          coord = lp_build_mul(coord_bld, coord, length_f);
 102          coord = lp_build_itrunc(coord_bld, coord);
 103       }
 104       break;
 105
 106    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 107       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 108       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 109       break;
 110
 111    case PIPE_TEX_WRAP_CLAMP:
 112    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 113    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 114    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 115    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 116    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 117    default:
 118       assert(0);
 119    }
 120
 121    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
 122                                   out_offset, out_i);
 123 }
 124
 125
 126 /**
 127  * Build LLVM code for texture coord wrapping, for nearest filtering,
 128  * for float texcoords.
 129  * \param coord  the incoming texcoord (s,t,r or q)
 130  * \param length  the texture size along one dimension
 131  * \param is_pot  if TRUE, length is a power of two
 132  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 133  * \param icoord  the texcoord after wrapping, as int
 134  */
 135 static void
 136 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
 137                                    LLVMValueRef coord,
 138                                    LLVMValueRef length,
 139                                    boolean is_pot,
 140                                    unsigned wrap_mode,
 141                                    LLVMValueRef *icoord)
 142 {
 143    struct lp_build_context *coord_bld = &bld->coord_bld;
 144    LLVMValueRef length_minus_one;
 145
 146    switch(wrap_mode) {
 147    case PIPE_TEX_WRAP_REPEAT:
 148       /* take fraction, unnormalize */
 149       coord = lp_build_fract_safe(coord_bld, coord);
 150       coord = lp_build_mul(coord_bld, coord, length);
 151       *icoord = lp_build_itrunc(coord_bld, coord);
 152       break;
 153    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 154       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 155       if (bld->static_state->normalized_coords) {
 156          /* scale coord to length */
 157          coord = lp_build_mul(coord_bld, coord, length);
 158       }
 159       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 160                              length_minus_one);
 161       *icoord = lp_build_itrunc(coord_bld, coord);
 162       break;
 163
 164    case PIPE_TEX_WRAP_CLAMP:
 165    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 166    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 167    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 168    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 169    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 170    default:
 171       assert(0);
 172    }
 173 }
 174
 175
 176 /**
 177  * Build LLVM code for texture coord wrapping, for linear filtering,
 178  * for scaled integer texcoords.
 179  * \param block_length  is the length of the pixel block along the
 180  *                      coordinate axis
 181  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 182  * \param length  the texture size along one dimension
 183  * \param stride  pixel stride along the coordinate axis (in bytes)
 184  * \param is_pot  if TRUE, length is a power of two
 185  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 186  * \param offset0  resulting relative offset for coord0
 187  * \param offset1  resulting relative offset for coord0 + 1
 188  * \param i0  resulting sub-block pixel coordinate for coord0
 189  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 190  */
 191 static void
 192 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 193                                 unsigned block_length,
 194                                 LLVMValueRef coord0,
 195                                 LLVMValueRef *weight_i,
 196                                 LLVMValueRef coord_f,
 197                                 LLVMValueRef length,
 198                                 LLVMValueRef stride,
 199                                 boolean is_pot,
 200                                 unsigned wrap_mode,
 201                                 LLVMValueRef *offset0,
 202                                 LLVMValueRef *offset1,
 203                                 LLVMValueRef *i0,
 204                                 LLVMValueRef *i1)
 205 {
 206    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 207    LLVMBuilderRef builder = bld->gallivm->builder;
 208    LLVMValueRef length_minus_one;
 209    LLVMValueRef lmask, umask, mask;
 210
 211    /*
 212     * If the pixel block covers more than one pixel then there is no easy
 213     * way to calculate offset1 relative to offset0. Instead, compute them
 214     * independently. Otherwise, try to compute offset0 and offset1 with
 215     * a single stride multiplication.
 216     */
 217
 218    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 219
 220    if (block_length != 1) {
 221       LLVMValueRef coord1;
 222       switch(wrap_mode) {
 223       case PIPE_TEX_WRAP_REPEAT:
 224          if (is_pot) {
 225             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 226             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 227             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 228          }
 229          else {
 230             LLVMValueRef mask;
 231             LLVMValueRef weight;
 232             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 233             lp_build_coord_repeat_npot_linear(bld, coord_f,
 234                                               length, length_f,
 235                                               &coord0, &weight);
 236             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 237                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 238             coord1 = LLVMBuildAnd(builder,
 239                                   lp_build_add(int_coord_bld, coord0,
 240                                                int_coord_bld->one),
 241                                   mask, "");
 242             weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
 243             *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
 244          }
 245          break;
 246
 247       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 248          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 249          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
 250                                 length_minus_one);
 251          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
 252                                 length_minus_one);
 253          break;
 254
 255       case PIPE_TEX_WRAP_CLAMP:
 256       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 257       case PIPE_TEX_WRAP_MIRROR_REPEAT:
 258       case PIPE_TEX_WRAP_MIRROR_CLAMP:
 259       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 260       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 261       default:
 262          assert(0);
 263          coord0 = int_coord_bld->zero;
 264          coord1 = int_coord_bld->zero;
 265          break;
 266       }
 267       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
 268                                      offset0, i0);
 269       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
 270                                      offset1, i1);
 271       return;
 272    }
 273
 274    *i0 = int_coord_bld->zero;
 275    *i1 = int_coord_bld->zero;
 276
 277    switch(wrap_mode) {
 278    case PIPE_TEX_WRAP_REPEAT:
 279       if (is_pot) {
 280          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 281       }
 282       else {
 283          LLVMValueRef weight;
 284          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 285          lp_build_coord_repeat_npot_linear(bld, coord_f,
 286                                            length, length_f,
 287                                            &coord0, &weight);
 288          weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
 289          *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
 290       }
 291
 292       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 293                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 294
 295       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 296       *offset1 = LLVMBuildAnd(builder,
 297                               lp_build_add(int_coord_bld, *offset0, stride),
 298                               mask, "");
 299       break;
 300
 301    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 302       /* XXX this might be slower than the separate path
 303        * on some newer cpus. With sse41 this is 8 instructions vs. 7
 304        * - at least on SNB this is almost certainly slower since
 305        * min/max are cheaper than selects, and the muls aren't bad.
 306        */
 307       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 308                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 309       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 310                                PIPE_FUNC_LESS, coord0, length_minus_one);
 311
 312       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 313       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 314
 315       mask = LLVMBuildAnd(builder, lmask, umask, "");
 316
 317       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 318       *offset1 = lp_build_add(int_coord_bld,
 319                               *offset0,
 320                               LLVMBuildAnd(builder, stride, mask, ""));
 321       break;
 322
 323    case PIPE_TEX_WRAP_CLAMP:
 324    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 325    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 326    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 327    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 328    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 329    default:
 330       assert(0);
 331       *offset0 = int_coord_bld->zero;
 332       *offset1 = int_coord_bld->zero;
 333       break;
 334    }
 335 }
 336
 337
 338 /**
 339  * Build LLVM code for texture coord wrapping, for linear filtering,
 340  * for float texcoords.
 341  * \param block_length  is the length of the pixel block along the
 342  *                      coordinate axis
 343  * \param coord  the incoming texcoord (s,t,r or q)
 344  * \param length  the texture size along one dimension
 345  * \param is_pot  if TRUE, length is a power of two
 346  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 347  * \param coord0  the first texcoord after wrapping, as int
 348  * \param coord1  the second texcoord after wrapping, as int
 349  * \param weight  the filter weight as int (0-255)
 350  * \param force_nearest  if this coord actually uses nearest filtering
 351  */
 352 static void
 353 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
 354                                   unsigned block_length,
 355                                   LLVMValueRef coord,
 356                                   LLVMValueRef length,
 357                                   boolean is_pot,
 358                                   unsigned wrap_mode,
 359                                   LLVMValueRef *coord0,
 360                                   LLVMValueRef *coord1,
 361                                   LLVMValueRef *weight,
 362                                   unsigned force_nearest)
 363 {
 364    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 365    struct lp_build_context *coord_bld = &bld->coord_bld;
 366    LLVMBuilderRef builder = bld->gallivm->builder;
 367    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 368    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 369
 370    switch(wrap_mode) {
 371    case PIPE_TEX_WRAP_REPEAT:
 372       if (is_pot) {
 373          /* mul by size and subtract 0.5 */
 374          coord = lp_build_mul(coord_bld, coord, length);
 375          if (!force_nearest)
 376             coord = lp_build_sub(coord_bld, coord, half);
 377          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 378          /* convert to int, compute lerp weight */
 379          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 380          *coord1 = lp_build_ifloor(coord_bld, *coord1);
 381          /* repeat wrap */
 382          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
 383          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
 384          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
 385       }
 386       else {
 387          LLVMValueRef mask;
 388          /* wrap with normalized floats is just fract */
 389          coord = lp_build_fract(coord_bld, coord);
 390          /* unnormalize */
 391          coord = lp_build_mul(coord_bld, coord, length);
 392          /*
 393           * we avoided the 0.5/length division, have to fix up wrong
 394           * edge cases with selects
 395           */
 396          *coord1 = lp_build_add(coord_bld, coord, half);
 397          coord = lp_build_sub(coord_bld, coord, half);
 398          *weight = lp_build_fract(coord_bld, coord);
 399          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 400                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
 401          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
 402          *coord0 = lp_build_itrunc(coord_bld, *coord0);
 403          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 404                                  PIPE_FUNC_LESS, *coord1, length);
 405          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
 406          *coord1 = lp_build_itrunc(coord_bld, *coord1);
 407       }
 408       break;
 409    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 410       if (bld->static_state->normalized_coords) {
 411          /* mul by tex size */
 412          coord = lp_build_mul(coord_bld, coord, length);
 413       }
 414       /* subtract 0.5 */
 415       if (!force_nearest) {
 416          coord = lp_build_sub(coord_bld, coord, half);
 417       }
 418       /* clamp to [0, length - 1] */
 419       coord = lp_build_min(coord_bld, coord, length_minus_one);
 420       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 421       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 422       /* convert to int, compute lerp weight */
 423       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 424       /* coord1 = min(coord1, length-1) */
 425       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
 426       *coord1 = lp_build_itrunc(coord_bld, *coord1);
 427       break;
 428    default:
 429       assert(0);
 430       *coord0 = int_coord_bld->zero;
 431       *coord1 = int_coord_bld->zero;
 432       *weight = coord_bld->zero;
 433       break;
 434    }
 435    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
 436    *weight = lp_build_itrunc(coord_bld, *weight);
 437    return;
 438 }
 439
 440
 441 /**
 442  * Fetch texels for image with nearest sampling.
 443  * Return filtered color as two vectors of 16-bit fixed point values.
 444  */
 445 static void
 446 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
 447                                     LLVMValueRef data_ptr,
 448                                     LLVMValueRef offset,
 449                                     LLVMValueRef x_subcoord,
 450                                     LLVMValueRef y_subcoord,
 451                                     LLVMValueRef *colors_lo,
 452                                     LLVMValueRef *colors_hi)
 453 {
 454    /*
 455     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 456     *
 457     *   rgba0 rgba1 rgba2 rgba3
 458     *
 459     * bit cast them into 16 x u8
 460     *
 461     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 462     *
 463     * unpack them into two 8 x i16:
 464     *
 465     *   r0 g0 b0 a0 r1 g1 b1 a1
 466     *   r2 g2 b2 a2 r3 g3 b3 a3
 467     *
 468     * The higher 8 bits of the resulting elements will be zero.
 469     */
 470    LLVMBuilderRef builder = bld->gallivm->builder;
 471    LLVMValueRef rgba8;
 472    struct lp_build_context h16, u8n;
 473    LLVMTypeRef u8n_vec_type;
 474
 475    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
 476    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 477    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 478
 479    if (util_format_is_rgba8_variant(bld->format_desc)) {
 480       /*
 481        * Given the format is a rgba8, just read the pixels as is,
 482        * without any swizzling. Swizzling will be done later.
 483        */
 484       rgba8 = lp_build_gather(bld->gallivm,
 485                               bld->texel_type.length,
 486                               bld->format_desc->block.bits,
 487                               bld->texel_type.width,
 488                               data_ptr, offset);
 489
 490       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 491    }
 492    else {
 493       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 494                                       bld->format_desc,
 495                                       u8n.type,
 496                                       data_ptr, offset,
 497                                       x_subcoord,
 498                                       y_subcoord);
 499    }
 500
 501    /* Expand one 4*rgba8 to two 2*rgba16 */
 502    lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
 503                     rgba8,
 504                     colors_lo, colors_hi);
 505 }
 506
 507
 508 /**
 509  * Sample a single texture image with nearest sampling.
 510  * If sampling a cube texture, r = cube face in [0,5].
 511  * Return filtered color as two vectors of 16-bit fixed point values.
 512  */
 513 static void
 514 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 515                               LLVMValueRef int_size,
 516                               LLVMValueRef row_stride_vec,
 517                               LLVMValueRef img_stride_vec,
 518                               LLVMValueRef data_ptr,
 519                               LLVMValueRef mipoffsets,
 520                               LLVMValueRef s,
 521                               LLVMValueRef t,
 522                               LLVMValueRef r,
 523                               LLVMValueRef *colors_lo,
 524                               LLVMValueRef *colors_hi)
 525 {
 526    const unsigned dims = bld->dims;
 527    LLVMBuilderRef builder = bld->gallivm->builder;
 528    struct lp_build_context i32;
 529    LLVMTypeRef i32_vec_type;
 530    LLVMValueRef i32_c8;
 531    LLVMValueRef width_vec, height_vec, depth_vec;
 532    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
 533    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
 534    LLVMValueRef x_stride;
 535    LLVMValueRef x_offset, offset;
 536    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 537
 538    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 539
 540    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
 541
 542    lp_build_extract_image_sizes(bld,
 543                                 &bld->int_size_bld,
 544                                 bld->int_coord_type,
 545                                 int_size,
 546                                 &width_vec,
 547                                 &height_vec,
 548                                 &depth_vec);
 549
 550    s_float = s; t_float = t; r_float = r;
 551
 552    if (bld->static_state->normalized_coords) {
 553       LLVMValueRef scaled_size;
 554       LLVMValueRef flt_size;
 555
 556       /* scale size by 256 (8 fractional bits) */
 557       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 558
 559       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 560
 561       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 562    }
 563    else {
 564       /* scale coords by 256 (8 fractional bits) */
 565       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 566       if (dims >= 2)
 567          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 568       if (dims >= 3)
 569          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 570    }
 571
 572    /* convert float to int */
 573    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 574    if (dims >= 2)
 575       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 576    if (dims >= 3)
 577       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 578
 579    /* compute floor (shift right 8) */
 580    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
 581    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 582    if (dims >= 2)
 583       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 584    if (dims >= 3)
 585       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 586
 587    /* get pixel, row, image strides */
 588    x_stride = lp_build_const_vec(bld->gallivm,
 589                                  bld->int_coord_bld.type,
 590                                  bld->format_desc->block.bits/8);
 591
 592    /* Do texcoord wrapping, compute texel offset */
 593    lp_build_sample_wrap_nearest_int(bld,
 594                                     bld->format_desc->block.width,
 595                                     s_ipart, s_float,
 596                                     width_vec, x_stride,
 597                                     bld->static_state->pot_width,
 598                                     bld->static_state->wrap_s,
 599                                     &x_offset, &x_subcoord);
 600    offset = x_offset;
 601    if (dims >= 2) {
 602       LLVMValueRef y_offset;
 603       lp_build_sample_wrap_nearest_int(bld,
 604                                        bld->format_desc->block.height,
 605                                        t_ipart, t_float,
 606                                        height_vec, row_stride_vec,
 607                                        bld->static_state->pot_height,
 608                                        bld->static_state->wrap_t,
 609                                        &y_offset, &y_subcoord);
 610       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
 611       if (dims >= 3) {
 612          LLVMValueRef z_offset;
 613          lp_build_sample_wrap_nearest_int(bld,
 614                                           1, /* block length (depth) */
 615                                           r_ipart, r_float,
 616                                           depth_vec, img_stride_vec,
 617                                           bld->static_state->pot_depth,
 618                                           bld->static_state->wrap_r,
 619                                           &z_offset, &z_subcoord);
 620          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 621       }
 622    }
 623    if (bld->static_state->target == PIPE_TEXTURE_CUBE ||
 624        bld->static_state->target == PIPE_TEXTURE_1D_ARRAY ||
 625        bld->static_state->target == PIPE_TEXTURE_2D_ARRAY) {
 626       LLVMValueRef z_offset;
 627       /* The r coord is the cube face in [0,5] or array layer */
 628       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 629       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 630    }
 631    if (mipoffsets) {
 632       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 633    }
 634
 635    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 636                                        x_subcoord, y_subcoord,
 637                                        colors_lo, colors_hi);
 638 }
 639
 640
 641 /**
 642  * Sample a single texture image with nearest sampling.
 643  * If sampling a cube texture, r = cube face in [0,5].
 644  * Return filtered color as two vectors of 16-bit fixed point values.
 645  * Does address calcs (except offsets) with floats.
 646  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
 647  */
 648 static void
 649 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
 650                                      LLVMValueRef int_size,
 651                                      LLVMValueRef row_stride_vec,
 652                                      LLVMValueRef img_stride_vec,
 653                                      LLVMValueRef data_ptr,
 654                                      LLVMValueRef mipoffsets,
 655                                      LLVMValueRef s,
 656                                      LLVMValueRef t,
 657                                      LLVMValueRef r,
 658                                      LLVMValueRef *colors_lo,
 659                                      LLVMValueRef *colors_hi)
 660    {
 661    const unsigned dims = bld->dims;
 662    LLVMValueRef width_vec, height_vec, depth_vec;
 663    LLVMValueRef offset;
 664    LLVMValueRef x_subcoord, y_subcoord;
 665    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
 666    LLVMValueRef flt_size;
 667
 668    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 669
 670    lp_build_extract_image_sizes(bld,
 671                                 &bld->float_size_bld,
 672                                 bld->coord_type,
 673                                 flt_size,
 674                                 &width_vec,
 675                                 &height_vec,
 676                                 &depth_vec);
 677
 678    /* Do texcoord wrapping */
 679    lp_build_sample_wrap_nearest_float(bld,
 680                                       s, width_vec,
 681                                       bld->static_state->pot_width,
 682                                       bld->static_state->wrap_s,
 683                                       &x_icoord);
 684
 685    if (dims >= 2) {
 686       lp_build_sample_wrap_nearest_float(bld,
 687                                          t, height_vec,
 688                                          bld->static_state->pot_height,
 689                                          bld->static_state->wrap_t,
 690                                          &y_icoord);
 691
 692       if (dims >= 3) {
 693          lp_build_sample_wrap_nearest_float(bld,
 694                                             r, depth_vec,
 695                                             bld->static_state->pot_depth,
 696                                             bld->static_state->wrap_r,
 697                                             &z_icoord);
 698       }
 699    }
 700    if (bld->static_state->target == PIPE_TEXTURE_CUBE ||
 701        bld->static_state->target == PIPE_TEXTURE_1D_ARRAY ||
 702        bld->static_state->target == PIPE_TEXTURE_2D_ARRAY) {
 703       z_icoord = r;
 704    }
 705
 706    /*
 707     * From here on we deal with ints, and we should split up the 256bit
 708     * vectors manually for better generated code.
 709     */
 710
 711    /*
 712     * compute texel offsets -
 713     * cannot do offset calc with floats, difficult for block-based formats,
 714     * and not enough precision anyway.
 715     */
 716    lp_build_sample_offset(&bld->int_coord_bld,
 717                           bld->format_desc,
 718                           x_icoord, y_icoord,
 719                           z_icoord,
 720                           row_stride_vec, img_stride_vec,
 721                           &offset,
 722                           &x_subcoord, &y_subcoord);
 723    if (mipoffsets) {
 724       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 725    }
 726
 727    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 728                                        x_subcoord, y_subcoord,
 729                                        colors_lo, colors_hi);
 730 }
 731
 732
 733 /**
 734  * Fetch texels for image with linear sampling.
 735  * Return filtered color as two vectors of 16-bit fixed point values.
 736  */
 737 static void
 738 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
 739                                    LLVMValueRef data_ptr,
 740                                    LLVMValueRef offset[2][2][2],
 741                                    LLVMValueRef x_subcoord[2],
 742                                    LLVMValueRef y_subcoord[2],
 743                                    LLVMValueRef s_fpart,
 744                                    LLVMValueRef t_fpart,
 745                                    LLVMValueRef r_fpart,
 746                                    LLVMValueRef *colors_lo,
 747                                    LLVMValueRef *colors_hi)
 748 {
 749    const unsigned dims = bld->dims;
 750    LLVMBuilderRef builder = bld->gallivm->builder;
 751    struct lp_build_context h16, u8n;
 752    LLVMTypeRef h16_vec_type, u8n_vec_type;
 753    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 754    LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
 755    LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
 756    LLVMValueRef shuffle_lo, shuffle_hi;
 757    LLVMValueRef s_fpart_lo, s_fpart_hi;
 758    LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL;
 759    LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL;
 760    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
 761    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
 762    LLVMValueRef packed_lo, packed_hi;
 763    unsigned i, j, k;
 764    unsigned numj, numk;
 765
 766    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
 767    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 768    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
 769    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 770
 771    /*
 772     * Transform 4 x i32 in
 773     *
 774     *   s_fpart = {s0, s1, s2, s3}
 775     *
 776     * into 8 x i16
 777     *
 778     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
 779     *
 780     * into two 8 x i16
 781     *
 782     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
 783     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
 784     *
 785     * and likewise for t_fpart. There is no risk of loosing precision here
 786     * since the fractional parts only use the lower 8bits.
 787     */
 788    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
 789    if (dims >= 2)
 790       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
 791    if (dims >= 3)
 792       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
 793
 794    for (j = 0; j < h16.type.length; j += 4) {
 795 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 796       unsigned subindex = 0;
 797 #else
 798       unsigned subindex = 1;
 799 #endif
 800       LLVMValueRef index;
 801
 802       index = LLVMConstInt(elem_type, j/2 + subindex, 0);
 803       for (i = 0; i < 4; ++i)
 804          shuffles_lo[j + i] = index;
 805
 806       index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
 807       for (i = 0; i < 4; ++i)
 808          shuffles_hi[j + i] = index;
 809    }
 810
 811    shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
 812    shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
 813
 814    s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 815                                        shuffle_lo, "");
 816    s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 817                                        shuffle_hi, "");
 818    if (dims >= 2) {
 819       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 820                                           shuffle_lo, "");
 821       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 822                                           shuffle_hi, "");
 823    }
 824    if (dims >= 3) {
 825       r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 826                                           shuffle_lo, "");
 827       r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 828                                           shuffle_hi, "");
 829    }
 830
 831    /*
 832     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 833     *
 834     *   rgba0 rgba1 rgba2 rgba3
 835     *
 836     * bit cast them into 16 x u8
 837     *
 838     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 839     *
 840     * unpack them into two 8 x i16:
 841     *
 842     *   r0 g0 b0 a0 r1 g1 b1 a1
 843     *   r2 g2 b2 a2 r3 g3 b3 a3
 844     *
 845     * The higher 8 bits of the resulting elements will be zero.
 846     */
 847    numj = 1 + (dims >= 2);
 848    numk = 1 + (dims >= 3);
 849
 850    for (k = 0; k < numk; k++) {
 851       for (j = 0; j < numj; j++) {
 852          for (i = 0; i < 2; i++) {
 853             LLVMValueRef rgba8;
 854
 855             if (util_format_is_rgba8_variant(bld->format_desc)) {
 856                /*
 857                 * Given the format is a rgba8, just read the pixels as is,
 858                 * without any swizzling. Swizzling will be done later.
 859                 */
 860                rgba8 = lp_build_gather(bld->gallivm,
 861                                        bld->texel_type.length,
 862                                        bld->format_desc->block.bits,
 863                                        bld->texel_type.width,
 864                                        data_ptr, offset[k][j][i]);
 865
 866                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 867             }
 868             else {
 869                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 870                                                bld->format_desc,
 871                                                u8n.type,
 872                                                data_ptr, offset[k][j][i],
 873                                                x_subcoord[i],
 874                                                y_subcoord[j]);
 875             }
 876
 877             /* Expand one 4*rgba8 to two 2*rgba16 */
 878             lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
 879                              rgba8,
 880                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
 881          }
 882       }
 883    }
 884
 885    /*
 886     * Linear interpolation with 8.8 fixed point.
 887     */
 888    if (bld->static_state->force_nearest_s) {
 889       /* special case 1-D lerp */
 890       packed_lo = lp_build_lerp(&h16,
 891                                 t_fpart_lo,
 892                                 neighbors_lo[0][0][0],
 893                                 neighbors_lo[0][0][1]);
 894
 895       packed_hi = lp_build_lerp(&h16,
 896                                 t_fpart_hi,
 897                                 neighbors_hi[0][1][0],
 898                                 neighbors_hi[0][1][0]);
 899    }
 900    else if (bld->static_state->force_nearest_t) {
 901       /* special case 1-D lerp */
 902       packed_lo = lp_build_lerp(&h16,
 903                                 s_fpart_lo,
 904                                 neighbors_lo[0][0][0],
 905                                 neighbors_lo[0][0][1]);
 906
 907       packed_hi = lp_build_lerp(&h16,
 908                                 s_fpart_hi,
 909                                 neighbors_hi[0][0][0],
 910                                 neighbors_hi[0][0][1]);
 911    }
 912    else {
 913       /* general 1/2/3-D lerping */
 914       if (dims == 1) {
 915          packed_lo = lp_build_lerp(&h16,
 916                                    s_fpart_lo,
 917                                    neighbors_lo[0][0][0],
 918                                    neighbors_lo[0][0][1]);
 919
 920          packed_hi = lp_build_lerp(&h16,
 921                                    s_fpart_hi,
 922                                    neighbors_hi[0][0][0],
 923                                    neighbors_hi[0][0][1]);
 924       }
 925       else {
 926          /* 2-D lerp */
 927          packed_lo = lp_build_lerp_2d(&h16,
 928                                       s_fpart_lo, t_fpart_lo,
 929                                       neighbors_lo[0][0][0],
 930                                       neighbors_lo[0][0][1],
 931                                       neighbors_lo[0][1][0],
 932                                       neighbors_lo[0][1][1]);
 933
 934          packed_hi = lp_build_lerp_2d(&h16,
 935                                       s_fpart_hi, t_fpart_hi,
 936                                       neighbors_hi[0][0][0],
 937                                       neighbors_hi[0][0][1],
 938                                       neighbors_hi[0][1][0],
 939                                       neighbors_hi[0][1][1]);
 940
 941          if (dims >= 3) {
 942             LLVMValueRef packed_lo2, packed_hi2;
 943
 944             /* lerp in the second z slice */
 945             packed_lo2 = lp_build_lerp_2d(&h16,
 946                                           s_fpart_lo, t_fpart_lo,
 947                                           neighbors_lo[1][0][0],
 948                                           neighbors_lo[1][0][1],
 949                                           neighbors_lo[1][1][0],
 950                                           neighbors_lo[1][1][1]);
 951
 952             packed_hi2 = lp_build_lerp_2d(&h16,
 953                                           s_fpart_hi, t_fpart_hi,
 954                                           neighbors_hi[1][0][0],
 955                                           neighbors_hi[1][0][1],
 956                                           neighbors_hi[1][1][0],
 957                                           neighbors_hi[1][1][1]);
 958             /* interp between two z slices */
 959             packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 960                                       packed_lo, packed_lo2);
 961             packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 962                                       packed_hi, packed_hi2);
 963          }
 964       }
 965    }
 966
 967    *colors_lo = packed_lo;
 968    *colors_hi = packed_hi;
 969 }
 970
 971 /**
 972  * Sample a single texture image with (bi-)(tri-)linear sampling.
 973  * Return filtered color as two vectors of 16-bit fixed point values.
 974  */
 975 static void
 976 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 977                              LLVMValueRef int_size,
 978                              LLVMValueRef row_stride_vec,
 979                              LLVMValueRef img_stride_vec,
 980                              LLVMValueRef data_ptr,
 981                              LLVMValueRef mipoffsets,
 982                              LLVMValueRef s,
 983                              LLVMValueRef t,
 984                              LLVMValueRef r,
 985                              LLVMValueRef *colors_lo,
 986                              LLVMValueRef *colors_hi)
 987 {
 988    const unsigned dims = bld->dims;
 989    LLVMBuilderRef builder = bld->gallivm->builder;
 990    struct lp_build_context i32;
 991    LLVMTypeRef i32_vec_type;
 992    LLVMValueRef i32_c8, i32_c128, i32_c255;
 993    LLVMValueRef width_vec, height_vec, depth_vec;
 994    LLVMValueRef s_ipart, s_fpart, s_float;
 995    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
 996    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
 997    LLVMValueRef x_stride, y_stride, z_stride;
 998    LLVMValueRef x_offset0, x_offset1;
 999    LLVMValueRef y_offset0, y_offset1;
1000    LLVMValueRef z_offset0, z_offset1;
1001    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1002    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
1003    unsigned x, y, z;
1004
1005    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
1006
1007    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
1008
1009    lp_build_extract_image_sizes(bld,
1010                                 &bld->int_size_bld,
1011                                 bld->int_coord_type,
1012                                 int_size,
1013                                 &width_vec,
1014                                 &height_vec,
1015                                 &depth_vec);
1016
1017    s_float = s; t_float = t; r_float = r;
1018
1019    if (bld->static_state->normalized_coords) {
1020       LLVMValueRef scaled_size;
1021       LLVMValueRef flt_size;
1022
1023       /* scale size by 256 (8 fractional bits) */
1024       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1025
1026       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1027
1028       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1029    }
1030    else {
1031       /* scale coords by 256 (8 fractional bits) */
1032       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1033       if (dims >= 2)
1034          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1035       if (dims >= 3)
1036          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1037    }
1038
1039    /* convert float to int */
1040    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1041    if (dims >= 2)
1042       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1043    if (dims >= 3)
1044       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
1045
1046    /* subtract 0.5 (add -128) */
1047    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1048    if (!bld->static_state->force_nearest_s) {
1049       s = LLVMBuildAdd(builder, s, i32_c128, "");
1050    }
1051    if (dims >= 2 && !bld->static_state->force_nearest_t) {
1052       t = LLVMBuildAdd(builder, t, i32_c128, "");
1053    }
1054    if (dims >= 3) {
1055       r = LLVMBuildAdd(builder, r, i32_c128, "");
1056    }
1057
1058    /* compute floor (shift right 8) */
1059    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1060    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1061    if (dims >= 2)
1062       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1063    if (dims >= 3)
1064       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1065
1066    /* compute fractional part (AND with 0xff) */
1067    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1068    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1069    if (dims >= 2)
1070       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1071    if (dims >= 3)
1072       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1073
1074    /* get pixel, row and image strides */
1075    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1076                                  bld->format_desc->block.bits/8);
1077    y_stride = row_stride_vec;
1078    z_stride = img_stride_vec;
1079
1080    /* do texcoord wrapping and compute texel offsets */
1081    lp_build_sample_wrap_linear_int(bld,
1082                                    bld->format_desc->block.width,
1083                                    s_ipart, &s_fpart, s_float,
1084                                    width_vec, x_stride,
1085                                    bld->static_state->pot_width,
1086                                    bld->static_state->wrap_s,
1087                                    &x_offset0, &x_offset1,
1088                                    &x_subcoord[0], &x_subcoord[1]);
1089
1090    /* add potential cube/array/mip offsets now as they are constant per pixel */
1091    if (bld->static_state->target == PIPE_TEXTURE_CUBE ||
1092        bld->static_state->target == PIPE_TEXTURE_1D_ARRAY ||
1093        bld->static_state->target == PIPE_TEXTURE_2D_ARRAY) {
1094       LLVMValueRef z_offset;
1095       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1096       /* The r coord is the cube face in [0,5] or array layer */
1097       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1098       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1099    }
1100    if (mipoffsets) {
1101       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1102       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1103    }
1104
1105    for (z = 0; z < 2; z++) {
1106       for (y = 0; y < 2; y++) {
1107          offset[z][y][0] = x_offset0;
1108          offset[z][y][1] = x_offset1;
1109       }
1110    }
1111
1112    if (dims >= 2) {
1113       lp_build_sample_wrap_linear_int(bld,
1114                                       bld->format_desc->block.height,
1115                                       t_ipart, &t_fpart, t_float,
1116                                       height_vec, y_stride,
1117                                       bld->static_state->pot_height,
1118                                       bld->static_state->wrap_t,
1119                                       &y_offset0, &y_offset1,
1120                                       &y_subcoord[0], &y_subcoord[1]);
1121
1122       for (z = 0; z < 2; z++) {
1123          for (x = 0; x < 2; x++) {
1124             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1125                                            offset[z][0][x], y_offset0);
1126             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1127                                            offset[z][1][x], y_offset1);
1128          }
1129       }
1130    }
1131
1132    if (dims >= 3) {
1133       lp_build_sample_wrap_linear_int(bld,
1134                                       bld->format_desc->block.height,
1135                                       r_ipart, &r_fpart, r_float,
1136                                       depth_vec, z_stride,
1137                                       bld->static_state->pot_depth,
1138                                       bld->static_state->wrap_r,
1139                                       &z_offset0, &z_offset1,
1140                                       &z_subcoord[0], &z_subcoord[1]);
1141       for (y = 0; y < 2; y++) {
1142          for (x = 0; x < 2; x++) {
1143             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1144                                            offset[0][y][x], z_offset0);
1145             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1146                                            offset[1][y][x], z_offset1);
1147          }
1148       }
1149    }
1150
1151    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1152                                       x_subcoord, y_subcoord,
1153                                       s_fpart, t_fpart, r_fpart,
1154                                       colors_lo, colors_hi);
1155 }
1156
1157
1158 /**
1159  * Sample a single texture image with (bi-)(tri-)linear sampling.
1160  * Return filtered color as two vectors of 16-bit fixed point values.
1161  * Does address calcs (except offsets) with floats.
1162  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1163  */
1164 static void
1165 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1166                                     LLVMValueRef int_size,
1167                                     LLVMValueRef row_stride_vec,
1168                                     LLVMValueRef img_stride_vec,
1169                                     LLVMValueRef data_ptr,
1170                                     LLVMValueRef mipoffsets,
1171                                     LLVMValueRef s,
1172                                     LLVMValueRef t,
1173                                     LLVMValueRef r,
1174                                     LLVMValueRef *colors_lo,
1175                                     LLVMValueRef *colors_hi)
1176 {
1177    const unsigned dims = bld->dims;
1178    LLVMValueRef width_vec, height_vec, depth_vec;
1179    LLVMValueRef s_fpart;
1180    LLVMValueRef t_fpart = NULL;
1181    LLVMValueRef r_fpart = NULL;
1182    LLVMValueRef x_stride, y_stride, z_stride;
1183    LLVMValueRef x_offset0, x_offset1;
1184    LLVMValueRef y_offset0, y_offset1;
1185    LLVMValueRef z_offset0, z_offset1;
1186    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1187    LLVMValueRef x_subcoord[2], y_subcoord[2];
1188    LLVMValueRef flt_size;
1189    LLVMValueRef x_icoord0, x_icoord1;
1190    LLVMValueRef y_icoord0, y_icoord1;
1191    LLVMValueRef z_icoord0, z_icoord1;
1192    unsigned x, y, z;
1193
1194    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1195
1196    lp_build_extract_image_sizes(bld,
1197                                 &bld->float_size_bld,
1198                                 bld->coord_type,
1199                                 flt_size,
1200                                 &width_vec,
1201                                 &height_vec,
1202                                 &depth_vec);
1203
1204    /* do texcoord wrapping and compute texel offsets */
1205    lp_build_sample_wrap_linear_float(bld,
1206                                      bld->format_desc->block.width,
1207                                      s, width_vec,
1208                                      bld->static_state->pot_width,
1209                                      bld->static_state->wrap_s,
1210                                      &x_icoord0, &x_icoord1,
1211                                      &s_fpart,
1212                                      bld->static_state->force_nearest_s);
1213
1214    if (dims >= 2) {
1215       lp_build_sample_wrap_linear_float(bld,
1216                                         bld->format_desc->block.height,
1217                                         t, height_vec,
1218                                         bld->static_state->pot_height,
1219                                         bld->static_state->wrap_t,
1220                                         &y_icoord0, &y_icoord1,
1221                                         &t_fpart,
1222                                         bld->static_state->force_nearest_t);
1223
1224       if (dims >= 3) {
1225          lp_build_sample_wrap_linear_float(bld,
1226                                            bld->format_desc->block.height,
1227                                            r, depth_vec,
1228                                            bld->static_state->pot_depth,
1229                                            bld->static_state->wrap_r,
1230                                            &z_icoord0, &z_icoord1,
1231                                            &r_fpart, 0);
1232       }
1233    }
1234
1235    /*
1236     * From here on we deal with ints, and we should split up the 256bit
1237     * vectors manually for better generated code.
1238     */
1239
1240    /* get pixel, row and image strides */
1241    x_stride = lp_build_const_vec(bld->gallivm,
1242                                  bld->int_coord_bld.type,
1243                                  bld->format_desc->block.bits/8);
1244    y_stride = row_stride_vec;
1245    z_stride = img_stride_vec;
1246
1247    /*
1248     * compute texel offset -
1249     * cannot do offset calc with floats, difficult for block-based formats,
1250     * and not enough precision anyway.
1251     */
1252    lp_build_sample_partial_offset(&bld->int_coord_bld,
1253                                   bld->format_desc->block.width,
1254                                   x_icoord0, x_stride,
1255                                   &x_offset0, &x_subcoord[0]);
1256    lp_build_sample_partial_offset(&bld->int_coord_bld,
1257                                   bld->format_desc->block.width,
1258                                   x_icoord1, x_stride,
1259                                   &x_offset1, &x_subcoord[1]);
1260
1261    /* add potential cube/array/mip offsets now as they are constant per pixel */
1262    if (bld->static_state->target == PIPE_TEXTURE_CUBE ||
1263        bld->static_state->target == PIPE_TEXTURE_1D_ARRAY ||
1264        bld->static_state->target == PIPE_TEXTURE_2D_ARRAY) {
1265       LLVMValueRef z_offset;
1266       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1267       /* The r coord is the cube face in [0,5] or array layer */
1268       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1269       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1270    }
1271    if (mipoffsets) {
1272       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1273       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1274    }
1275
1276    for (z = 0; z < 2; z++) {
1277       for (y = 0; y < 2; y++) {
1278          offset[z][y][0] = x_offset0;
1279          offset[z][y][1] = x_offset1;
1280       }
1281    }
1282
1283    if (dims >= 2) {
1284       lp_build_sample_partial_offset(&bld->int_coord_bld,
1285                                      bld->format_desc->block.height,
1286                                      y_icoord0, y_stride,
1287                                      &y_offset0, &y_subcoord[0]);
1288       lp_build_sample_partial_offset(&bld->int_coord_bld,
1289                                      bld->format_desc->block.height,
1290                                      y_icoord1, y_stride,
1291                                      &y_offset1, &y_subcoord[1]);
1292       for (z = 0; z < 2; z++) {
1293          for (x = 0; x < 2; x++) {
1294             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1295                                            offset[z][0][x], y_offset0);
1296             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1297                                            offset[z][1][x], y_offset1);
1298          }
1299       }
1300    }
1301
1302    if (dims >= 3) {
1303       LLVMValueRef z_subcoord[2];
1304       lp_build_sample_partial_offset(&bld->int_coord_bld,
1305                                      1,
1306                                      z_icoord0, z_stride,
1307                                      &z_offset0, &z_subcoord[0]);
1308       lp_build_sample_partial_offset(&bld->int_coord_bld,
1309                                      1,
1310                                      z_icoord1, z_stride,
1311                                      &z_offset1, &z_subcoord[1]);
1312       for (y = 0; y < 2; y++) {
1313          for (x = 0; x < 2; x++) {
1314             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1315                                            offset[0][y][x], z_offset0);
1316             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1317                                            offset[1][y][x], z_offset1);
1318          }
1319       }
1320    }
1321
1322    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1323                                       x_subcoord, y_subcoord,
1324                                       s_fpart, t_fpart, r_fpart,
1325                                       colors_lo, colors_hi);
1326 }
1327
1328
1329 /**
1330  * Sample the texture/mipmap using given image filter and mip filter.
1331  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1332  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1333  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1334  */
1335 static void
1336 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1337                        unsigned img_filter,
1338                        unsigned mip_filter,
1339                        LLVMValueRef s,
1340                        LLVMValueRef t,
1341                        LLVMValueRef r,
1342                        LLVMValueRef ilevel0,
1343                        LLVMValueRef ilevel1,
1344                        LLVMValueRef lod_fpart,
1345                        LLVMValueRef colors_lo_var,
1346                        LLVMValueRef colors_hi_var)
1347 {
1348    LLVMBuilderRef builder = bld->gallivm->builder;
1349    LLVMValueRef size0;
1350    LLVMValueRef size1;
1351    LLVMValueRef row_stride0_vec = NULL;
1352    LLVMValueRef row_stride1_vec = NULL;
1353    LLVMValueRef img_stride0_vec = NULL;
1354    LLVMValueRef img_stride1_vec = NULL;
1355    LLVMValueRef data_ptr0;
1356    LLVMValueRef data_ptr1;
1357    LLVMValueRef mipoff0 = NULL;
1358    LLVMValueRef mipoff1 = NULL;
1359    LLVMValueRef colors0_lo, colors0_hi;
1360    LLVMValueRef colors1_lo, colors1_hi;
1361
1362    /* sample the first mipmap level */
1363    lp_build_mipmap_level_sizes(bld, ilevel0,
1364                                &size0,
1365                                &row_stride0_vec, &img_stride0_vec);
1366    if (bld->num_lods == 1) {
1367       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1368    }
1369    else {
1370       /* This path should work for num_lods 1 too but slightly less efficient */
1371       data_ptr0 = bld->base_ptr;
1372       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1373    }
1374
1375    if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1376       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1377          lp_build_sample_image_nearest_afloat(bld,
1378                                               size0,
1379                                               row_stride0_vec, img_stride0_vec,
1380                                               data_ptr0, mipoff0, s, t, r,
1381                                               &colors0_lo, &colors0_hi);
1382       }
1383       else {
1384          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1385          lp_build_sample_image_linear_afloat(bld,
1386                                              size0,
1387                                              row_stride0_vec, img_stride0_vec,
1388                                              data_ptr0, mipoff0, s, t, r,
1389                                              &colors0_lo, &colors0_hi);
1390       }
1391    }
1392    else {
1393       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1394          lp_build_sample_image_nearest(bld,
1395                                        size0,
1396                                        row_stride0_vec, img_stride0_vec,
1397                                        data_ptr0, mipoff0, s, t, r,
1398                                        &colors0_lo, &colors0_hi);
1399       }
1400       else {
1401          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1402          lp_build_sample_image_linear(bld,
1403                                       size0,
1404                                       row_stride0_vec, img_stride0_vec,
1405                                       data_ptr0, mipoff0, s, t, r,
1406                                       &colors0_lo, &colors0_hi);
1407       }
1408    }
1409
1410    /* Store the first level's colors in the output variables */
1411    LLVMBuildStore(builder, colors0_lo, colors_lo_var);
1412    LLVMBuildStore(builder, colors0_hi, colors_hi_var);
1413
1414    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1415       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1416                                                      bld->perquadf_bld.type, 256.0);
1417       LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
1418       struct lp_build_if_state if_ctx;
1419       LLVMValueRef need_lerp;
1420       unsigned num_quads = bld->coord_bld.type.length / 4;
1421       unsigned i;
1422
1423       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1424       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1425
1426       /* need_lerp = lod_fpart > 0 */
1427       if (num_quads == 1) {
1428          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1429                                    lod_fpart, bld->perquadi_bld.zero,
1430                                    "need_lerp");
1431       }
1432       else {
1433          /*
1434           * We'll do mip filtering if any of the quads need it.
1435           * It might be better to split the vectors here and only fetch/filter
1436           * quads which need it.
1437           */
1438          /*
1439           * We need to clamp lod_fpart here since we can get negative
1440           * values which would screw up filtering if not all
1441           * lod_fpart values have same sign.
1442           * We can however then skip the greater than comparison.
1443           */
1444          lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
1445                                   bld->perquadi_bld.zero);
1446          need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
1447       }
1448
1449       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1450       {
1451          struct lp_build_context h16_bld;
1452
1453          lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
1454
1455          /* sample the second mipmap level */
1456          lp_build_mipmap_level_sizes(bld, ilevel1,
1457                                      &size1,
1458                                      &row_stride1_vec, &img_stride1_vec);
1459          lp_build_mipmap_level_sizes(bld, ilevel1,
1460                                      &size1,
1461                                      &row_stride1_vec, &img_stride1_vec);
1462          if (bld->num_lods == 1) {
1463             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1464          }
1465          else {
1466             data_ptr1 = bld->base_ptr;
1467             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1468          }
1469
1470          if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1471             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1472                lp_build_sample_image_nearest_afloat(bld,
1473                                                     size1,
1474                                                     row_stride1_vec, img_stride1_vec,
1475                                                     data_ptr1, mipoff1, s, t, r,
1476                                                     &colors1_lo, &colors1_hi);
1477             }
1478             else {
1479                lp_build_sample_image_linear_afloat(bld,
1480                                                    size1,
1481                                                    row_stride1_vec, img_stride1_vec,
1482                                                    data_ptr1, mipoff1, s, t, r,
1483                                                    &colors1_lo, &colors1_hi);
1484             }
1485          }
1486          else {
1487             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1488                lp_build_sample_image_nearest(bld,
1489                                              size1,
1490                                              row_stride1_vec, img_stride1_vec,
1491                                              data_ptr1, mipoff1, s, t, r,
1492                                              &colors1_lo, &colors1_hi);
1493             }
1494             else {
1495                lp_build_sample_image_linear(bld,
1496                                             size1,
1497                                             row_stride1_vec, img_stride1_vec,
1498                                             data_ptr1, mipoff1, s, t, r,
1499                                             &colors1_lo, &colors1_hi);
1500             }
1501          }
1502
1503          /* interpolate samples from the two mipmap levels */
1504
1505          if (num_quads == 1) {
1506             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
1507             lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
1508
1509 #if HAVE_LLVM == 0x208
1510             /* This is a work-around for a bug in LLVM 2.8.
1511              * Evidently, something goes wrong in the construction of the
1512              * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
1513              * to force the vector to be properly constructed.
1514              * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1515              */
1516             {
1517                LLVMValueRef shuffles[8], shuffle;
1518                assert(h16_bld.type.length <= Elements(shuffles));
1519                for (i = 0; i < h16_bld.type.length; i++)
1520                   shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
1521                shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
1522                lod_fpart = LLVMBuildShuffleVector(builder,
1523                                                   lod_fpart, lod_fpart,
1524                                                   shuffle, "");
1525             }
1526 #endif
1527
1528             colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
1529                                        colors0_lo, colors1_lo);
1530             colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
1531                                        colors0_hi, colors1_hi);
1532          }
1533          else {
1534             LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16];
1535             struct lp_type perquadi16_type = bld->perquadi_bld.type;
1536             perquadi16_type.width /= 2;
1537             perquadi16_type.length *= 2;
1538             lod_fpart = LLVMBuildBitCast(builder, lod_fpart,
1539                                          lp_build_vec_type(bld->gallivm,
1540                                                            perquadi16_type), "");
1541             /* XXX this only works for exactly 2 quads. More quads need shuffle */
1542             assert(num_quads == 2);
1543             for (i = 0; i < num_quads; i++) {
1544                LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2);
1545                lod_parts[i] = lp_build_extract_broadcast(bld->gallivm,
1546                                                          perquadi16_type,
1547                                                          h16_bld.type,
1548                                                          lod_fpart,
1549                                                          indexi2);
1550             }
1551             colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0],
1552                                        colors0_lo, colors1_lo);
1553             colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1],
1554                                        colors0_hi, colors1_hi);
1555          }
1556
1557          LLVMBuildStore(builder, colors0_lo, colors_lo_var);
1558          LLVMBuildStore(builder, colors0_hi, colors_hi_var);
1559       }
1560       lp_build_endif(&if_ctx);
1561    }
1562 }
1563
1564
1565
1566 /**
1567  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1568  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1569  * but only limited texture coord wrap modes.
1570  */
1571 void
1572 lp_build_sample_aos(struct lp_build_sample_context *bld,
1573                     unsigned unit,
1574                     LLVMValueRef s,
1575                     LLVMValueRef t,
1576                     LLVMValueRef r,
1577                     LLVMValueRef lod_ipart,
1578                     LLVMValueRef lod_fpart,
1579                     LLVMValueRef ilevel0,
1580                     LLVMValueRef ilevel1,
1581                     LLVMValueRef texel_out[4])
1582 {
1583    struct lp_build_context *int_bld = &bld->int_bld;
1584    LLVMBuilderRef builder = bld->gallivm->builder;
1585    const unsigned mip_filter = bld->static_state->min_mip_filter;
1586    const unsigned min_filter = bld->static_state->min_img_filter;
1587    const unsigned mag_filter = bld->static_state->mag_img_filter;
1588    const unsigned dims = bld->dims;
1589    LLVMValueRef packed, packed_lo, packed_hi;
1590    LLVMValueRef unswizzled[4];
1591    struct lp_build_context h16_bld;
1592
1593    /* we only support the common/simple wrap modes at this time */
1594    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
1595    if (dims >= 2)
1596       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
1597    if (dims >= 3)
1598       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
1599
1600
1601    /* make 16-bit fixed-pt builder context */
1602    lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
1603
1604    /*
1605     * Get/interpolate texture colors.
1606     */
1607
1608    packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1609    packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1610
1611    if (min_filter == mag_filter) {
1612       /* no need to distinguish between minification and magnification */
1613       lp_build_sample_mipmap(bld,
1614                              min_filter, mip_filter,
1615                              s, t, r,
1616                              ilevel0, ilevel1, lod_fpart,
1617                              packed_lo, packed_hi);
1618    }
1619    else {
1620       /* Emit conditional to choose min image filter or mag image filter
1621        * depending on the lod being > 0 or <= 0, respectively.
1622        */
1623       struct lp_build_if_state if_ctx;
1624       LLVMValueRef minify;
1625
1626       /*
1627        * XXX this should to all lods into account, if some are min
1628        * some max probably could hack up the coords/weights in the linear
1629        * path with selects to work for nearest.
1630        * If that's just two quads sitting next to each other it seems
1631        * quite ok to do the same filtering method on both though, at
1632        * least unless we have explicit lod (and who uses different
1633        * min/mag filter with that?)
1634        */
1635       if (bld->num_lods > 1)
1636          lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1637                                               lp_build_const_int32(bld->gallivm, 0), "");
1638
1639       /* minify = lod >= 0.0 */
1640       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1641                              lod_ipart, int_bld->zero, "");
1642
1643       lp_build_if(&if_ctx, bld->gallivm, minify);
1644       {
1645          /* Use the minification filter */
1646          lp_build_sample_mipmap(bld,
1647                                 min_filter, mip_filter,
1648                                 s, t, r,
1649                                 ilevel0, ilevel1, lod_fpart,
1650                                 packed_lo, packed_hi);
1651       }
1652       lp_build_else(&if_ctx);
1653       {
1654          /* Use the magnification filter */
1655          lp_build_sample_mipmap(bld,
1656                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1657                                 s, t, r,
1658                                 ilevel0, NULL, NULL,
1659                                 packed_lo, packed_hi);
1660       }
1661       lp_build_endif(&if_ctx);
1662    }
1663
1664    /*
1665     * combine the values stored in 'packed_lo' and 'packed_hi' variables
1666     * into 'packed'
1667     */
1668    packed = lp_build_pack2(bld->gallivm,
1669                            h16_bld.type, lp_type_unorm(8, bld->vector_width),
1670                            LLVMBuildLoad(builder, packed_lo, ""),
1671                            LLVMBuildLoad(builder, packed_hi, ""));
1672
1673    /*
1674     * Convert to SoA and swizzle.
1675     */
1676    lp_build_rgba8_to_f32_soa(bld->gallivm,
1677                              bld->texel_type,
1678                              packed, unswizzled);
1679
1680    if (util_format_is_rgba8_variant(bld->format_desc)) {
1681       lp_build_format_swizzle_soa(bld->format_desc,
1682                                   &bld->texel_bld,
1683                                   unswizzled, texel_out);
1684    }
1685    else {
1686       texel_out[0] = unswizzled[0];
1687       texel_out[1] = unswizzled[1];
1688       texel_out[2] = unswizzled[2];
1689       texel_out[3] = unswizzled[3];
1690    }
1691 }