src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- AoS.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_bitarit.h"
  50 #include "lp_bld_logic.h"
  51 #include "lp_bld_swizzle.h"
  52 #include "lp_bld_pack.h"
  53 #include "lp_bld_flow.h"
  54 #include "lp_bld_gather.h"
  55 #include "lp_bld_format.h"
  56 #include "lp_bld_init.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_quad.h"
  60
  61
  62 /**
  63  * Build LLVM code for texture coord wrapping, for nearest filtering,
  64  * for scaled integer texcoords.
  65  * \param block_length  is the length of the pixel block along the
  66  *                      coordinate axis
  67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
  68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
  69  * \param length  the texture size along one dimension
  70  * \param stride  pixel stride along the coordinate axis (in bytes)
  71  * \param offset  the texel offset along the coord axis
  72  * \param is_pot  if TRUE, length is a power of two
  73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  74  * \param out_offset  byte offset for the wrapped coordinate
  75  * \param out_i  resulting sub-block pixel coordinate for coord0
  76  */
  77 static void
  78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  79                                  unsigned block_length,
  80                                  LLVMValueRef coord,
  81                                  LLVMValueRef coord_f,
  82                                  LLVMValueRef length,
  83                                  LLVMValueRef stride,
  84                                  LLVMValueRef offset,
  85                                  boolean is_pot,
  86                                  unsigned wrap_mode,
  87                                  LLVMValueRef *out_offset,
  88                                  LLVMValueRef *out_i)
  89 {
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef length_minus_one;
  93
  94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  95
  96    switch(wrap_mode) {
  97    case PIPE_TEX_WRAP_REPEAT:
  98       if(is_pot)
  99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
 100       else {
 101          struct lp_build_context *coord_bld = &bld->coord_bld;
 102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 103          if (offset) {
 104             offset = lp_build_int_to_float(coord_bld, offset);
 105             offset = lp_build_div(coord_bld, offset, length_f);
 106             coord_f = lp_build_add(coord_bld, coord_f, offset);
 107          }
 108          coord = lp_build_fract_safe(coord_bld, coord_f);
 109          coord = lp_build_mul(coord_bld, coord, length_f);
 110          coord = lp_build_itrunc(coord_bld, coord);
 111       }
 112       break;
 113
 114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 117       break;
 118
 119    case PIPE_TEX_WRAP_CLAMP:
 120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 125    default:
 126       assert(0);
 127    }
 128
 129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
 130                                   out_offset, out_i);
 131 }
 132
 133
 134 /**
 135  * Build LLVM code for texture coord wrapping, for nearest filtering,
 136  * for float texcoords.
 137  * \param coord  the incoming texcoord (s,t or r)
 138  * \param length  the texture size along one dimension
 139  * \param offset  the texel offset along the coord axis
 140  * \param is_pot  if TRUE, length is a power of two
 141  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 142  * \param icoord  the texcoord after wrapping, as int
 143  */
 144 static void
 145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
 146                                    LLVMValueRef coord,
 147                                    LLVMValueRef length,
 148                                    LLVMValueRef offset,
 149                                    boolean is_pot,
 150                                    unsigned wrap_mode,
 151                                    LLVMValueRef *icoord)
 152 {
 153    struct lp_build_context *coord_bld = &bld->coord_bld;
 154    LLVMValueRef length_minus_one;
 155
 156    switch(wrap_mode) {
 157    case PIPE_TEX_WRAP_REPEAT:
 158       if (offset) {
 159          /* this is definitely not ideal for POT case */
 160          offset = lp_build_int_to_float(coord_bld, offset);
 161          offset = lp_build_div(coord_bld, offset, length);
 162          coord = lp_build_add(coord_bld, coord, offset);
 163       }
 164       /* take fraction, unnormalize */
 165       coord = lp_build_fract_safe(coord_bld, coord);
 166       coord = lp_build_mul(coord_bld, coord, length);
 167       *icoord = lp_build_itrunc(coord_bld, coord);
 168       break;
 169    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 170       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 171       if (bld->static_sampler_state->normalized_coords) {
 172          /* scale coord to length */
 173          coord = lp_build_mul(coord_bld, coord, length);
 174       }
 175       if (offset) {
 176          offset = lp_build_int_to_float(coord_bld, offset);
 177          coord = lp_build_add(coord_bld, coord, offset);
 178       }
 179       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 180                              length_minus_one);
 181       *icoord = lp_build_itrunc(coord_bld, coord);
 182       break;
 183
 184    case PIPE_TEX_WRAP_CLAMP:
 185    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 186    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 187    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 188    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 189    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 190    default:
 191       assert(0);
 192    }
 193 }
 194
 195
 196 /**
 197  * Helper to compute the first coord and the weight for
 198  * linear wrap repeat npot textures
 199  */
 200 static void
 201 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
 202                                       LLVMValueRef coord_f,
 203                                       LLVMValueRef length_i,
 204                                       LLVMValueRef length_f,
 205                                       LLVMValueRef *coord0_i,
 206                                       LLVMValueRef *weight_i)
 207 {
 208    struct lp_build_context *coord_bld = &bld->coord_bld;
 209    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 210    struct lp_build_context abs_coord_bld;
 211    struct lp_type abs_type;
 212    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 213                                                 int_coord_bld->one);
 214    LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
 215
 216    /* wrap with normalized floats is just fract */
 217    coord_f = lp_build_fract(coord_bld, coord_f);
 218    /* mul by size */
 219    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 220    /* convert to int, compute lerp weight */
 221    coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
 222
 223    /* At this point we don't have any negative numbers so use non-signed
 224     * build context which might help on some archs.
 225     */
 226    abs_type = coord_bld->type;
 227    abs_type.sign = 0;
 228    lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
 229    *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
 230
 231    /* subtract 0.5 (add -128) */
 232    i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
 233    *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
 234
 235    /* compute fractional part (AND with 0xff) */
 236    i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
 237    *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
 238
 239    /* compute floor (shift right 8) */
 240    i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
 241    *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
 242    /*
 243     * we avoided the 0.5/length division before the repeat wrap,
 244     * now need to fix up edge cases with selects
 245     */
 246    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 247                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 248    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 249    /*
 250     * We should never get values too large - except if coord was nan or inf,
 251     * in which case things go terribly wrong...
 252     * Alternatively, could use fract_safe above...
 253     */
 254    *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
 255 }
 256
 257
 258 /**
 259  * Build LLVM code for texture coord wrapping, for linear filtering,
 260  * for scaled integer texcoords.
 261  * \param block_length  is the length of the pixel block along the
 262  *                      coordinate axis
 263  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
 264  * \param coord_f  the incoming texcoord (s,t or r) as float vec
 265  * \param length  the texture size along one dimension
 266  * \param stride  pixel stride along the coordinate axis (in bytes)
 267  * \param offset  the texel offset along the coord axis
 268  * \param is_pot  if TRUE, length is a power of two
 269  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 270  * \param offset0  resulting relative offset for coord0
 271  * \param offset1  resulting relative offset for coord0 + 1
 272  * \param i0  resulting sub-block pixel coordinate for coord0
 273  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 274  */
 275 static void
 276 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 277                                 unsigned block_length,
 278                                 LLVMValueRef coord0,
 279                                 LLVMValueRef *weight_i,
 280                                 LLVMValueRef coord_f,
 281                                 LLVMValueRef length,
 282                                 LLVMValueRef stride,
 283                                 LLVMValueRef offset,
 284                                 boolean is_pot,
 285                                 unsigned wrap_mode,
 286                                 LLVMValueRef *offset0,
 287                                 LLVMValueRef *offset1,
 288                                 LLVMValueRef *i0,
 289                                 LLVMValueRef *i1)
 290 {
 291    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 292    LLVMBuilderRef builder = bld->gallivm->builder;
 293    LLVMValueRef length_minus_one;
 294    LLVMValueRef lmask, umask, mask;
 295
 296    /*
 297     * If the pixel block covers more than one pixel then there is no easy
 298     * way to calculate offset1 relative to offset0. Instead, compute them
 299     * independently. Otherwise, try to compute offset0 and offset1 with
 300     * a single stride multiplication.
 301     */
 302
 303    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 304
 305    if (block_length != 1) {
 306       LLVMValueRef coord1;
 307       switch(wrap_mode) {
 308       case PIPE_TEX_WRAP_REPEAT:
 309          if (is_pot) {
 310             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 311             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 312             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 313          }
 314          else {
 315             LLVMValueRef mask;
 316             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 317             if (offset) {
 318                offset = lp_build_int_to_float(&bld->coord_bld, offset);
 319                offset = lp_build_div(&bld->coord_bld, offset, length_f);
 320                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 321             }
 322             lp_build_coord_repeat_npot_linear_int(bld, coord_f,
 323                                                   length, length_f,
 324                                                   &coord0, weight_i);
 325             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 326                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 327             coord1 = LLVMBuildAnd(builder,
 328                                   lp_build_add(int_coord_bld, coord0,
 329                                                int_coord_bld->one),
 330                                   mask, "");
 331          }
 332          break;
 333
 334       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 335          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 336          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
 337                                 length_minus_one);
 338          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
 339                                 length_minus_one);
 340          break;
 341
 342       case PIPE_TEX_WRAP_CLAMP:
 343       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 344       case PIPE_TEX_WRAP_MIRROR_REPEAT:
 345       case PIPE_TEX_WRAP_MIRROR_CLAMP:
 346       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 347       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 348       default:
 349          assert(0);
 350          coord0 = int_coord_bld->zero;
 351          coord1 = int_coord_bld->zero;
 352          break;
 353       }
 354       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
 355                                      offset0, i0);
 356       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
 357                                      offset1, i1);
 358       return;
 359    }
 360
 361    *i0 = int_coord_bld->zero;
 362    *i1 = int_coord_bld->zero;
 363
 364    switch(wrap_mode) {
 365    case PIPE_TEX_WRAP_REPEAT:
 366       if (is_pot) {
 367          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 368       }
 369       else {
 370          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 371          if (offset) {
 372             offset = lp_build_int_to_float(&bld->coord_bld, offset);
 373             offset = lp_build_div(&bld->coord_bld, offset, length_f);
 374             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 375          }
 376          lp_build_coord_repeat_npot_linear_int(bld, coord_f,
 377                                                length, length_f,
 378                                                &coord0, weight_i);
 379       }
 380
 381       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 382                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 383
 384       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 385       *offset1 = LLVMBuildAnd(builder,
 386                               lp_build_add(int_coord_bld, *offset0, stride),
 387                               mask, "");
 388       break;
 389
 390    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 391       /* XXX this might be slower than the separate path
 392        * on some newer cpus. With sse41 this is 8 instructions vs. 7
 393        * - at least on SNB this is almost certainly slower since
 394        * min/max are cheaper than selects, and the muls aren't bad.
 395        */
 396       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 397                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 398       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 399                                PIPE_FUNC_LESS, coord0, length_minus_one);
 400
 401       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 402       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 403
 404       mask = LLVMBuildAnd(builder, lmask, umask, "");
 405
 406       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 407       *offset1 = lp_build_add(int_coord_bld,
 408                               *offset0,
 409                               LLVMBuildAnd(builder, stride, mask, ""));
 410       break;
 411
 412    case PIPE_TEX_WRAP_CLAMP:
 413    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 414    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 415    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 416    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 417    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 418    default:
 419       assert(0);
 420       *offset0 = int_coord_bld->zero;
 421       *offset1 = int_coord_bld->zero;
 422       break;
 423    }
 424 }
 425
 426
 427 /**
 428  * Build LLVM code for texture coord wrapping, for linear filtering,
 429  * for float texcoords.
 430  * \param block_length  is the length of the pixel block along the
 431  *                      coordinate axis
 432  * \param coord  the incoming texcoord (s,t or r)
 433  * \param length  the texture size along one dimension
 434  * \param offset  the texel offset along the coord axis
 435  * \param is_pot  if TRUE, length is a power of two
 436  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 437  * \param coord0  the first texcoord after wrapping, as int
 438  * \param coord1  the second texcoord after wrapping, as int
 439  * \param weight  the filter weight as int (0-255)
 440  * \param force_nearest  if this coord actually uses nearest filtering
 441  */
 442 static void
 443 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
 444                                   unsigned block_length,
 445                                   LLVMValueRef coord,
 446                                   LLVMValueRef length,
 447                                   LLVMValueRef offset,
 448                                   boolean is_pot,
 449                                   unsigned wrap_mode,
 450                                   LLVMValueRef *coord0,
 451                                   LLVMValueRef *coord1,
 452                                   LLVMValueRef *weight,
 453                                   unsigned force_nearest)
 454 {
 455    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 456    struct lp_build_context *coord_bld = &bld->coord_bld;
 457    LLVMBuilderRef builder = bld->gallivm->builder;
 458    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 459    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 460
 461    switch(wrap_mode) {
 462    case PIPE_TEX_WRAP_REPEAT:
 463       if (is_pot) {
 464          /* mul by size and subtract 0.5 */
 465          coord = lp_build_mul(coord_bld, coord, length);
 466          if (offset) {
 467             offset = lp_build_int_to_float(coord_bld, offset);
 468             coord = lp_build_add(coord_bld, coord, offset);
 469          }
 470          if (!force_nearest)
 471             coord = lp_build_sub(coord_bld, coord, half);
 472          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 473          /* convert to int, compute lerp weight */
 474          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 475          *coord1 = lp_build_ifloor(coord_bld, *coord1);
 476          /* repeat wrap */
 477          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
 478          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
 479          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
 480       }
 481       else {
 482          LLVMValueRef mask;
 483          if (offset) {
 484             offset = lp_build_int_to_float(coord_bld, offset);
 485             offset = lp_build_div(coord_bld, offset, length);
 486             coord = lp_build_add(coord_bld, coord, offset);
 487          }
 488          /* wrap with normalized floats is just fract */
 489          coord = lp_build_fract(coord_bld, coord);
 490          /* unnormalize */
 491          coord = lp_build_mul(coord_bld, coord, length);
 492          /*
 493           * we avoided the 0.5/length division, have to fix up wrong
 494           * edge cases with selects
 495           */
 496          *coord1 = lp_build_add(coord_bld, coord, half);
 497          coord = lp_build_sub(coord_bld, coord, half);
 498          *weight = lp_build_fract(coord_bld, coord);
 499          /*
 500           * It is important for this comparison to be unordered
 501           * (or need fract_safe above).
 502           */
 503          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 504                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
 505          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
 506          *coord0 = lp_build_itrunc(coord_bld, *coord0);
 507          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 508                                  PIPE_FUNC_LESS, *coord1, length);
 509          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
 510          *coord1 = lp_build_itrunc(coord_bld, *coord1);
 511       }
 512       break;
 513    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 514       if (bld->static_sampler_state->normalized_coords) {
 515          /* mul by tex size */
 516          coord = lp_build_mul(coord_bld, coord, length);
 517       }
 518       if (offset) {
 519          offset = lp_build_int_to_float(coord_bld, offset);
 520          coord = lp_build_add(coord_bld, coord, offset);
 521       }
 522       /* subtract 0.5 */
 523       if (!force_nearest) {
 524          coord = lp_build_sub(coord_bld, coord, half);
 525       }
 526       /* clamp to [0, length - 1] */
 527       coord = lp_build_min_ext(coord_bld, coord, length_minus_one,
 528                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
 529       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 530       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 531       /* convert to int, compute lerp weight */
 532       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 533       /* coord1 = min(coord1, length-1) */
 534       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
 535       *coord1 = lp_build_itrunc(coord_bld, *coord1);
 536       break;
 537    default:
 538       assert(0);
 539       *coord0 = int_coord_bld->zero;
 540       *coord1 = int_coord_bld->zero;
 541       *weight = coord_bld->zero;
 542       break;
 543    }
 544    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
 545    *weight = lp_build_itrunc(coord_bld, *weight);
 546    return;
 547 }
 548
 549
 550 /**
 551  * Fetch texels for image with nearest sampling.
 552  * Return filtered color as two vectors of 16-bit fixed point values.
 553  */
 554 static void
 555 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
 556                                     LLVMValueRef data_ptr,
 557                                     LLVMValueRef offset,
 558                                     LLVMValueRef x_subcoord,
 559                                     LLVMValueRef y_subcoord,
 560                                     LLVMValueRef *colors)
 561 {
 562    /*
 563     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 564     *
 565     *   rgba0 rgba1 rgba2 rgba3
 566     *
 567     * bit cast them into 16 x u8
 568     *
 569     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 570     *
 571     * unpack them into two 8 x i16:
 572     *
 573     *   r0 g0 b0 a0 r1 g1 b1 a1
 574     *   r2 g2 b2 a2 r3 g3 b3 a3
 575     *
 576     * The higher 8 bits of the resulting elements will be zero.
 577     */
 578    LLVMBuilderRef builder = bld->gallivm->builder;
 579    LLVMValueRef rgba8;
 580    struct lp_build_context u8n;
 581    LLVMTypeRef u8n_vec_type;
 582
 583    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 584    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 585
 586    if (util_format_is_rgba8_variant(bld->format_desc)) {
 587       /*
 588        * Given the format is a rgba8, just read the pixels as is,
 589        * without any swizzling. Swizzling will be done later.
 590        */
 591       rgba8 = lp_build_gather(bld->gallivm,
 592                               bld->texel_type.length,
 593                               bld->format_desc->block.bits,
 594                               bld->texel_type.width,
 595                               TRUE,
 596                               data_ptr, offset, TRUE);
 597
 598       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 599    }
 600    else {
 601       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 602                                       bld->format_desc,
 603                                       u8n.type,
 604                                       TRUE,
 605                                       data_ptr, offset,
 606                                       x_subcoord,
 607                                       y_subcoord,
 608                                       bld->cache);
 609    }
 610
 611    *colors = rgba8;
 612 }
 613
 614
 615 /**
 616  * Sample a single texture image with nearest sampling.
 617  * If sampling a cube texture, r = cube face in [0,5].
 618  * Return filtered color as two vectors of 16-bit fixed point values.
 619  */
 620 static void
 621 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 622                               LLVMValueRef int_size,
 623                               LLVMValueRef row_stride_vec,
 624                               LLVMValueRef img_stride_vec,
 625                               LLVMValueRef data_ptr,
 626                               LLVMValueRef mipoffsets,
 627                               LLVMValueRef s,
 628                               LLVMValueRef t,
 629                               LLVMValueRef r,
 630                               const LLVMValueRef *offsets,
 631                               LLVMValueRef *colors)
 632 {
 633    const unsigned dims = bld->dims;
 634    struct lp_build_context i32;
 635    LLVMValueRef width_vec, height_vec, depth_vec;
 636    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
 637    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
 638    LLVMValueRef x_stride;
 639    LLVMValueRef x_offset, offset;
 640    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 641
 642    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 643
 644    lp_build_extract_image_sizes(bld,
 645                                 &bld->int_size_bld,
 646                                 bld->int_coord_type,
 647                                 int_size,
 648                                 &width_vec,
 649                                 &height_vec,
 650                                 &depth_vec);
 651
 652    s_float = s; t_float = t; r_float = r;
 653
 654    if (bld->static_sampler_state->normalized_coords) {
 655       LLVMValueRef flt_size;
 656
 657       flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 658
 659       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 660    }
 661
 662    /* convert float to int */
 663    /* For correct rounding, need floor, not truncation here.
 664     * Note that in some cases (clamp to edge, no texel offsets) we
 665     * could use a non-signed build context which would help archs
 666     * greatly which don't have arch rounding.
 667     */
 668    s_ipart = lp_build_ifloor(&bld->coord_bld, s);
 669    if (dims >= 2)
 670       t_ipart = lp_build_ifloor(&bld->coord_bld, t);
 671    if (dims >= 3)
 672       r_ipart = lp_build_ifloor(&bld->coord_bld, r);
 673
 674    /* add texel offsets */
 675    if (offsets[0]) {
 676       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
 677       if (dims >= 2) {
 678          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
 679          if (dims >= 3) {
 680             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
 681          }
 682       }
 683    }
 684
 685    /* get pixel, row, image strides */
 686    x_stride = lp_build_const_vec(bld->gallivm,
 687                                  bld->int_coord_bld.type,
 688                                  bld->format_desc->block.bits/8);
 689
 690    /* Do texcoord wrapping, compute texel offset */
 691    lp_build_sample_wrap_nearest_int(bld,
 692                                     bld->format_desc->block.width,
 693                                     s_ipart, s_float,
 694                                     width_vec, x_stride, offsets[0],
 695                                     bld->static_texture_state->pot_width,
 696                                     bld->static_sampler_state->wrap_s,
 697                                     &x_offset, &x_subcoord);
 698    offset = x_offset;
 699    if (dims >= 2) {
 700       LLVMValueRef y_offset;
 701       lp_build_sample_wrap_nearest_int(bld,
 702                                        bld->format_desc->block.height,
 703                                        t_ipart, t_float,
 704                                        height_vec, row_stride_vec, offsets[1],
 705                                        bld->static_texture_state->pot_height,
 706                                        bld->static_sampler_state->wrap_t,
 707                                        &y_offset, &y_subcoord);
 708       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
 709       if (dims >= 3) {
 710          LLVMValueRef z_offset;
 711          lp_build_sample_wrap_nearest_int(bld,
 712                                           1, /* block length (depth) */
 713                                           r_ipart, r_float,
 714                                           depth_vec, img_stride_vec, offsets[2],
 715                                           bld->static_texture_state->pot_depth,
 716                                           bld->static_sampler_state->wrap_r,
 717                                           &z_offset, &z_subcoord);
 718          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 719       }
 720    }
 721    if (has_layer_coord(bld->static_texture_state->target)) {
 722       LLVMValueRef z_offset;
 723       /* The r coord is the cube face in [0,5] or array layer */
 724       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 725       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 726    }
 727    if (mipoffsets) {
 728       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 729    }
 730
 731    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 732                                        x_subcoord, y_subcoord,
 733                                        colors);
 734 }
 735
 736
 737 /**
 738  * Sample a single texture image with nearest sampling.
 739  * If sampling a cube texture, r = cube face in [0,5].
 740  * Return filtered color as two vectors of 16-bit fixed point values.
 741  * Does address calcs (except offsets) with floats.
 742  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
 743  */
 744 static void
 745 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
 746                                      LLVMValueRef int_size,
 747                                      LLVMValueRef row_stride_vec,
 748                                      LLVMValueRef img_stride_vec,
 749                                      LLVMValueRef data_ptr,
 750                                      LLVMValueRef mipoffsets,
 751                                      LLVMValueRef s,
 752                                      LLVMValueRef t,
 753                                      LLVMValueRef r,
 754                                      const LLVMValueRef *offsets,
 755                                      LLVMValueRef *colors)
 756    {
 757    const unsigned dims = bld->dims;
 758    LLVMValueRef width_vec, height_vec, depth_vec;
 759    LLVMValueRef offset;
 760    LLVMValueRef x_subcoord, y_subcoord;
 761    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
 762    LLVMValueRef flt_size;
 763
 764    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 765
 766    lp_build_extract_image_sizes(bld,
 767                                 &bld->float_size_bld,
 768                                 bld->coord_type,
 769                                 flt_size,
 770                                 &width_vec,
 771                                 &height_vec,
 772                                 &depth_vec);
 773
 774    /* Do texcoord wrapping */
 775    lp_build_sample_wrap_nearest_float(bld,
 776                                       s, width_vec, offsets[0],
 777                                       bld->static_texture_state->pot_width,
 778                                       bld->static_sampler_state->wrap_s,
 779                                       &x_icoord);
 780
 781    if (dims >= 2) {
 782       lp_build_sample_wrap_nearest_float(bld,
 783                                          t, height_vec, offsets[1],
 784                                          bld->static_texture_state->pot_height,
 785                                          bld->static_sampler_state->wrap_t,
 786                                          &y_icoord);
 787
 788       if (dims >= 3) {
 789          lp_build_sample_wrap_nearest_float(bld,
 790                                             r, depth_vec, offsets[2],
 791                                             bld->static_texture_state->pot_depth,
 792                                             bld->static_sampler_state->wrap_r,
 793                                             &z_icoord);
 794       }
 795    }
 796    if (has_layer_coord(bld->static_texture_state->target)) {
 797       z_icoord = r;
 798    }
 799
 800    /*
 801     * From here on we deal with ints, and we should split up the 256bit
 802     * vectors manually for better generated code.
 803     */
 804
 805    /*
 806     * compute texel offsets -
 807     * cannot do offset calc with floats, difficult for block-based formats,
 808     * and not enough precision anyway.
 809     */
 810    lp_build_sample_offset(&bld->int_coord_bld,
 811                           bld->format_desc,
 812                           x_icoord, y_icoord,
 813                           z_icoord,
 814                           row_stride_vec, img_stride_vec,
 815                           &offset,
 816                           &x_subcoord, &y_subcoord);
 817    if (mipoffsets) {
 818       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 819    }
 820
 821    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 822                                        x_subcoord, y_subcoord,
 823                                        colors);
 824 }
 825
 826
 827 /**
 828  * Fetch texels for image with linear sampling.
 829  * Return filtered color as two vectors of 16-bit fixed point values.
 830  */
 831 static void
 832 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
 833                                    LLVMValueRef data_ptr,
 834                                    LLVMValueRef offset[2][2][2],
 835                                    LLVMValueRef x_subcoord[2],
 836                                    LLVMValueRef y_subcoord[2],
 837                                    LLVMValueRef s_fpart,
 838                                    LLVMValueRef t_fpart,
 839                                    LLVMValueRef r_fpart,
 840                                    LLVMValueRef *colors)
 841 {
 842    const unsigned dims = bld->dims;
 843    LLVMBuilderRef builder = bld->gallivm->builder;
 844    struct lp_build_context u8n;
 845    LLVMTypeRef u8n_vec_type;
 846    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 847    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 848    LLVMValueRef shuffle;
 849    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
 850    LLVMValueRef packed;
 851    unsigned i, j, k;
 852    unsigned numj, numk;
 853
 854    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 855    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 856
 857    /*
 858     * Transform 4 x i32 in
 859     *
 860     *   s_fpart = {s0, s1, s2, s3}
 861     *
 862     * where each value is between 0 and 0xff,
 863     *
 864     * into one 16 x i20
 865     *
 866     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
 867     *
 868     * and likewise for t_fpart. There is no risk of loosing precision here
 869     * since the fractional parts only use the lower 8bits.
 870     */
 871    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
 872    if (dims >= 2)
 873       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
 874    if (dims >= 3)
 875       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
 876
 877    for (j = 0; j < u8n.type.length; j += 4) {
 878 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 879       unsigned subindex = 0;
 880 #else
 881       unsigned subindex = 3;
 882 #endif
 883       LLVMValueRef index;
 884
 885       index = LLVMConstInt(elem_type, j + subindex, 0);
 886       for (i = 0; i < 4; ++i)
 887          shuffles[j + i] = index;
 888    }
 889
 890    shuffle = LLVMConstVector(shuffles, u8n.type.length);
 891
 892    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
 893                                     shuffle, "");
 894    if (dims >= 2) {
 895       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
 896                                        shuffle, "");
 897    }
 898    if (dims >= 3) {
 899       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
 900                                        shuffle, "");
 901    }
 902
 903    /*
 904     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 905     *
 906     *   rgba0 rgba1 rgba2 rgba3
 907     *
 908     * bit cast them into 16 x u8
 909     *
 910     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 911     *
 912     * unpack them into two 8 x i16:
 913     *
 914     *   r0 g0 b0 a0 r1 g1 b1 a1
 915     *   r2 g2 b2 a2 r3 g3 b3 a3
 916     *
 917     * The higher 8 bits of the resulting elements will be zero.
 918     */
 919    numj = 1 + (dims >= 2);
 920    numk = 1 + (dims >= 3);
 921
 922    for (k = 0; k < numk; k++) {
 923       for (j = 0; j < numj; j++) {
 924          for (i = 0; i < 2; i++) {
 925             LLVMValueRef rgba8;
 926
 927             if (util_format_is_rgba8_variant(bld->format_desc)) {
 928                /*
 929                 * Given the format is a rgba8, just read the pixels as is,
 930                 * without any swizzling. Swizzling will be done later.
 931                 */
 932                rgba8 = lp_build_gather(bld->gallivm,
 933                                        bld->texel_type.length,
 934                                        bld->format_desc->block.bits,
 935                                        bld->texel_type.width,
 936                                        TRUE,
 937                                        data_ptr, offset[k][j][i], TRUE);
 938
 939                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 940             }
 941             else {
 942                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 943                                                bld->format_desc,
 944                                                u8n.type,
 945                                                TRUE,
 946                                                data_ptr, offset[k][j][i],
 947                                                x_subcoord[i],
 948                                                y_subcoord[j],
 949                                                bld->cache);
 950             }
 951
 952             neighbors[k][j][i] = rgba8;
 953          }
 954       }
 955    }
 956
 957    /*
 958     * Linear interpolation with 8.8 fixed point.
 959     */
 960    if (bld->static_sampler_state->force_nearest_s) {
 961       /* special case 1-D lerp */
 962       packed = lp_build_lerp(&u8n,
 963                              t_fpart,
 964                              neighbors[0][0][0],
 965                              neighbors[0][0][1],
 966                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 967    }
 968    else if (bld->static_sampler_state->force_nearest_t) {
 969       /* special case 1-D lerp */
 970       packed = lp_build_lerp(&u8n,
 971                              s_fpart,
 972                              neighbors[0][0][0],
 973                              neighbors[0][0][1],
 974                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 975    }
 976    else {
 977       /* general 1/2/3-D lerping */
 978       if (dims == 1) {
 979          packed = lp_build_lerp(&u8n,
 980                                 s_fpart,
 981                                 neighbors[0][0][0],
 982                                 neighbors[0][0][1],
 983                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
 984       } else if (dims == 2) {
 985          /* 2-D lerp */
 986          packed = lp_build_lerp_2d(&u8n,
 987                                    s_fpart, t_fpart,
 988                                    neighbors[0][0][0],
 989                                    neighbors[0][0][1],
 990                                    neighbors[0][1][0],
 991                                    neighbors[0][1][1],
 992                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 993       } else {
 994          /* 3-D lerp */
 995          assert(dims == 3);
 996          packed = lp_build_lerp_3d(&u8n,
 997                                    s_fpart, t_fpart, r_fpart,
 998                                    neighbors[0][0][0],
 999                                    neighbors[0][0][1],
1000                                    neighbors[0][1][0],
1001                                    neighbors[0][1][1],
1002                                    neighbors[1][0][0],
1003                                    neighbors[1][0][1],
1004                                    neighbors[1][1][0],
1005                                    neighbors[1][1][1],
1006                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
1007       }
1008    }
1009
1010    *colors = packed;
1011 }
1012
1013 /**
1014  * Sample a single texture image with (bi-)(tri-)linear sampling.
1015  * Return filtered color as two vectors of 16-bit fixed point values.
1016  */
1017 static void
1018 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1019                              LLVMValueRef int_size,
1020                              LLVMValueRef row_stride_vec,
1021                              LLVMValueRef img_stride_vec,
1022                              LLVMValueRef data_ptr,
1023                              LLVMValueRef mipoffsets,
1024                              LLVMValueRef s,
1025                              LLVMValueRef t,
1026                              LLVMValueRef r,
1027                              const LLVMValueRef *offsets,
1028                              LLVMValueRef *colors)
1029 {
1030    const unsigned dims = bld->dims;
1031    LLVMBuilderRef builder = bld->gallivm->builder;
1032    struct lp_build_context i32;
1033    LLVMValueRef i32_c8, i32_c128, i32_c255;
1034    LLVMValueRef width_vec, height_vec, depth_vec;
1035    LLVMValueRef s_ipart, s_fpart, s_float;
1036    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
1037    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
1038    LLVMValueRef x_stride, y_stride, z_stride;
1039    LLVMValueRef x_offset0, x_offset1;
1040    LLVMValueRef y_offset0, y_offset1;
1041    LLVMValueRef z_offset0, z_offset1;
1042    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1043    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
1044    unsigned x, y, z;
1045
1046    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
1047
1048    lp_build_extract_image_sizes(bld,
1049                                 &bld->int_size_bld,
1050                                 bld->int_coord_type,
1051                                 int_size,
1052                                 &width_vec,
1053                                 &height_vec,
1054                                 &depth_vec);
1055
1056    s_float = s; t_float = t; r_float = r;
1057
1058    if (bld->static_sampler_state->normalized_coords) {
1059       LLVMValueRef scaled_size;
1060       LLVMValueRef flt_size;
1061
1062       /* scale size by 256 (8 fractional bits) */
1063       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1064
1065       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1066
1067       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1068    }
1069    else {
1070       /* scale coords by 256 (8 fractional bits) */
1071       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1072       if (dims >= 2)
1073          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1074       if (dims >= 3)
1075          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1076    }
1077
1078    /* convert float to int */
1079    /* For correct rounding, need round to nearest, not truncation here.
1080     * Note that in some cases (clamp to edge, no texel offsets) we
1081     * could use a non-signed build context which would help archs which
1082     * don't have fptosi intrinsic with nearest rounding implemented.
1083     */
1084    s = lp_build_iround(&bld->coord_bld, s);
1085    if (dims >= 2)
1086       t = lp_build_iround(&bld->coord_bld, t);
1087    if (dims >= 3)
1088       r = lp_build_iround(&bld->coord_bld, r);
1089
1090    /* subtract 0.5 (add -128) */
1091    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1092    if (!bld->static_sampler_state->force_nearest_s) {
1093       s = LLVMBuildAdd(builder, s, i32_c128, "");
1094    }
1095    if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
1096       t = LLVMBuildAdd(builder, t, i32_c128, "");
1097    }
1098    if (dims >= 3) {
1099       r = LLVMBuildAdd(builder, r, i32_c128, "");
1100    }
1101
1102    /* compute floor (shift right 8) */
1103    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1104    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1105    if (dims >= 2)
1106       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1107    if (dims >= 3)
1108       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1109
1110    /* add texel offsets */
1111    if (offsets[0]) {
1112       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
1113       if (dims >= 2) {
1114          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
1115          if (dims >= 3) {
1116             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
1117          }
1118       }
1119    }
1120
1121    /* compute fractional part (AND with 0xff) */
1122    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1123    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1124    if (dims >= 2)
1125       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1126    if (dims >= 3)
1127       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1128
1129    /* get pixel, row and image strides */
1130    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1131                                  bld->format_desc->block.bits/8);
1132    y_stride = row_stride_vec;
1133    z_stride = img_stride_vec;
1134
1135    /* do texcoord wrapping and compute texel offsets */
1136    lp_build_sample_wrap_linear_int(bld,
1137                                    bld->format_desc->block.width,
1138                                    s_ipart, &s_fpart, s_float,
1139                                    width_vec, x_stride, offsets[0],
1140                                    bld->static_texture_state->pot_width,
1141                                    bld->static_sampler_state->wrap_s,
1142                                    &x_offset0, &x_offset1,
1143                                    &x_subcoord[0], &x_subcoord[1]);
1144
1145    /* add potential cube/array/mip offsets now as they are constant per pixel */
1146    if (has_layer_coord(bld->static_texture_state->target)) {
1147       LLVMValueRef z_offset;
1148       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1149       /* The r coord is the cube face in [0,5] or array layer */
1150       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1151       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1152    }
1153    if (mipoffsets) {
1154       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1155       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1156    }
1157
1158    for (z = 0; z < 2; z++) {
1159       for (y = 0; y < 2; y++) {
1160          offset[z][y][0] = x_offset0;
1161          offset[z][y][1] = x_offset1;
1162       }
1163    }
1164
1165    if (dims >= 2) {
1166       lp_build_sample_wrap_linear_int(bld,
1167                                       bld->format_desc->block.height,
1168                                       t_ipart, &t_fpart, t_float,
1169                                       height_vec, y_stride, offsets[1],
1170                                       bld->static_texture_state->pot_height,
1171                                       bld->static_sampler_state->wrap_t,
1172                                       &y_offset0, &y_offset1,
1173                                       &y_subcoord[0], &y_subcoord[1]);
1174
1175       for (z = 0; z < 2; z++) {
1176          for (x = 0; x < 2; x++) {
1177             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1178                                            offset[z][0][x], y_offset0);
1179             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1180                                            offset[z][1][x], y_offset1);
1181          }
1182       }
1183    }
1184
1185    if (dims >= 3) {
1186       lp_build_sample_wrap_linear_int(bld,
1187                                       1, /* block length (depth) */
1188                                       r_ipart, &r_fpart, r_float,
1189                                       depth_vec, z_stride, offsets[2],
1190                                       bld->static_texture_state->pot_depth,
1191                                       bld->static_sampler_state->wrap_r,
1192                                       &z_offset0, &z_offset1,
1193                                       &z_subcoord[0], &z_subcoord[1]);
1194       for (y = 0; y < 2; y++) {
1195          for (x = 0; x < 2; x++) {
1196             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1197                                            offset[0][y][x], z_offset0);
1198             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1199                                            offset[1][y][x], z_offset1);
1200          }
1201       }
1202    }
1203
1204    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1205                                       x_subcoord, y_subcoord,
1206                                       s_fpart, t_fpart, r_fpart,
1207                                       colors);
1208 }
1209
1210
1211 /**
1212  * Sample a single texture image with (bi-)(tri-)linear sampling.
1213  * Return filtered color as two vectors of 16-bit fixed point values.
1214  * Does address calcs (except offsets) with floats.
1215  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1216  */
1217 static void
1218 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1219                                     LLVMValueRef int_size,
1220                                     LLVMValueRef row_stride_vec,
1221                                     LLVMValueRef img_stride_vec,
1222                                     LLVMValueRef data_ptr,
1223                                     LLVMValueRef mipoffsets,
1224                                     LLVMValueRef s,
1225                                     LLVMValueRef t,
1226                                     LLVMValueRef r,
1227                                     const LLVMValueRef *offsets,
1228                                     LLVMValueRef *colors)
1229 {
1230    const unsigned dims = bld->dims;
1231    LLVMValueRef width_vec, height_vec, depth_vec;
1232    LLVMValueRef s_fpart;
1233    LLVMValueRef t_fpart = NULL;
1234    LLVMValueRef r_fpart = NULL;
1235    LLVMValueRef x_stride, y_stride, z_stride;
1236    LLVMValueRef x_offset0, x_offset1;
1237    LLVMValueRef y_offset0, y_offset1;
1238    LLVMValueRef z_offset0, z_offset1;
1239    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1240    LLVMValueRef x_subcoord[2], y_subcoord[2];
1241    LLVMValueRef flt_size;
1242    LLVMValueRef x_icoord0, x_icoord1;
1243    LLVMValueRef y_icoord0, y_icoord1;
1244    LLVMValueRef z_icoord0, z_icoord1;
1245    unsigned x, y, z;
1246
1247    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1248
1249    lp_build_extract_image_sizes(bld,
1250                                 &bld->float_size_bld,
1251                                 bld->coord_type,
1252                                 flt_size,
1253                                 &width_vec,
1254                                 &height_vec,
1255                                 &depth_vec);
1256
1257    /* do texcoord wrapping and compute texel offsets */
1258    lp_build_sample_wrap_linear_float(bld,
1259                                      bld->format_desc->block.width,
1260                                      s, width_vec, offsets[0],
1261                                      bld->static_texture_state->pot_width,
1262                                      bld->static_sampler_state->wrap_s,
1263                                      &x_icoord0, &x_icoord1,
1264                                      &s_fpart,
1265                                      bld->static_sampler_state->force_nearest_s);
1266
1267    if (dims >= 2) {
1268       lp_build_sample_wrap_linear_float(bld,
1269                                         bld->format_desc->block.height,
1270                                         t, height_vec, offsets[1],
1271                                         bld->static_texture_state->pot_height,
1272                                         bld->static_sampler_state->wrap_t,
1273                                         &y_icoord0, &y_icoord1,
1274                                         &t_fpart,
1275                                         bld->static_sampler_state->force_nearest_t);
1276
1277       if (dims >= 3) {
1278          lp_build_sample_wrap_linear_float(bld,
1279                                            1, /* block length (depth) */
1280                                            r, depth_vec, offsets[2],
1281                                            bld->static_texture_state->pot_depth,
1282                                            bld->static_sampler_state->wrap_r,
1283                                            &z_icoord0, &z_icoord1,
1284                                            &r_fpart, 0);
1285       }
1286    }
1287
1288    /*
1289     * From here on we deal with ints, and we should split up the 256bit
1290     * vectors manually for better generated code.
1291     */
1292
1293    /* get pixel, row and image strides */
1294    x_stride = lp_build_const_vec(bld->gallivm,
1295                                  bld->int_coord_bld.type,
1296                                  bld->format_desc->block.bits/8);
1297    y_stride = row_stride_vec;
1298    z_stride = img_stride_vec;
1299
1300    /*
1301     * compute texel offset -
1302     * cannot do offset calc with floats, difficult for block-based formats,
1303     * and not enough precision anyway.
1304     */
1305    lp_build_sample_partial_offset(&bld->int_coord_bld,
1306                                   bld->format_desc->block.width,
1307                                   x_icoord0, x_stride,
1308                                   &x_offset0, &x_subcoord[0]);
1309    lp_build_sample_partial_offset(&bld->int_coord_bld,
1310                                   bld->format_desc->block.width,
1311                                   x_icoord1, x_stride,
1312                                   &x_offset1, &x_subcoord[1]);
1313
1314    /* add potential cube/array/mip offsets now as they are constant per pixel */
1315    if (has_layer_coord(bld->static_texture_state->target)) {
1316       LLVMValueRef z_offset;
1317       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1318       /* The r coord is the cube face in [0,5] or array layer */
1319       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1320       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1321    }
1322    if (mipoffsets) {
1323       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1324       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1325    }
1326
1327    for (z = 0; z < 2; z++) {
1328       for (y = 0; y < 2; y++) {
1329          offset[z][y][0] = x_offset0;
1330          offset[z][y][1] = x_offset1;
1331       }
1332    }
1333
1334    if (dims >= 2) {
1335       lp_build_sample_partial_offset(&bld->int_coord_bld,
1336                                      bld->format_desc->block.height,
1337                                      y_icoord0, y_stride,
1338                                      &y_offset0, &y_subcoord[0]);
1339       lp_build_sample_partial_offset(&bld->int_coord_bld,
1340                                      bld->format_desc->block.height,
1341                                      y_icoord1, y_stride,
1342                                      &y_offset1, &y_subcoord[1]);
1343       for (z = 0; z < 2; z++) {
1344          for (x = 0; x < 2; x++) {
1345             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1346                                            offset[z][0][x], y_offset0);
1347             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1348                                            offset[z][1][x], y_offset1);
1349          }
1350       }
1351    }
1352
1353    if (dims >= 3) {
1354       LLVMValueRef z_subcoord[2];
1355       lp_build_sample_partial_offset(&bld->int_coord_bld,
1356                                      1,
1357                                      z_icoord0, z_stride,
1358                                      &z_offset0, &z_subcoord[0]);
1359       lp_build_sample_partial_offset(&bld->int_coord_bld,
1360                                      1,
1361                                      z_icoord1, z_stride,
1362                                      &z_offset1, &z_subcoord[1]);
1363       for (y = 0; y < 2; y++) {
1364          for (x = 0; x < 2; x++) {
1365             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1366                                            offset[0][y][x], z_offset0);
1367             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1368                                            offset[1][y][x], z_offset1);
1369          }
1370       }
1371    }
1372
1373    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1374                                       x_subcoord, y_subcoord,
1375                                       s_fpart, t_fpart, r_fpart,
1376                                       colors);
1377 }
1378
1379
1380 /**
1381  * Sample the texture/mipmap using given image filter and mip filter.
1382  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1383  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1384  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1385  */
1386 static void
1387 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1388                        unsigned img_filter,
1389                        unsigned mip_filter,
1390                        LLVMValueRef s,
1391                        LLVMValueRef t,
1392                        LLVMValueRef r,
1393                        const LLVMValueRef *offsets,
1394                        LLVMValueRef ilevel0,
1395                        LLVMValueRef ilevel1,
1396                        LLVMValueRef lod_fpart,
1397                        LLVMValueRef colors_var)
1398 {
1399    LLVMBuilderRef builder = bld->gallivm->builder;
1400    LLVMValueRef size0;
1401    LLVMValueRef size1;
1402    LLVMValueRef row_stride0_vec = NULL;
1403    LLVMValueRef row_stride1_vec = NULL;
1404    LLVMValueRef img_stride0_vec = NULL;
1405    LLVMValueRef img_stride1_vec = NULL;
1406    LLVMValueRef data_ptr0;
1407    LLVMValueRef data_ptr1;
1408    LLVMValueRef mipoff0 = NULL;
1409    LLVMValueRef mipoff1 = NULL;
1410    LLVMValueRef colors0;
1411    LLVMValueRef colors1;
1412    boolean use_floats = util_cpu_caps.has_avx &&
1413                         !util_cpu_caps.has_avx2 &&
1414                         bld->coord_type.length > 4;
1415
1416    /* sample the first mipmap level */
1417    lp_build_mipmap_level_sizes(bld, ilevel0,
1418                                &size0,
1419                                &row_stride0_vec, &img_stride0_vec);
1420    if (bld->num_mips == 1) {
1421       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1422    }
1423    else {
1424       /* This path should work for num_lods 1 too but slightly less efficient */
1425       data_ptr0 = bld->base_ptr;
1426       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1427    }
1428
1429    if (use_floats) {
1430       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1431          lp_build_sample_image_nearest_afloat(bld,
1432                                               size0,
1433                                               row_stride0_vec, img_stride0_vec,
1434                                               data_ptr0, mipoff0, s, t, r, offsets,
1435                                               &colors0);
1436       }
1437       else {
1438          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1439          lp_build_sample_image_linear_afloat(bld,
1440                                              size0,
1441                                              row_stride0_vec, img_stride0_vec,
1442                                              data_ptr0, mipoff0, s, t, r, offsets,
1443                                              &colors0);
1444       }
1445    }
1446    else {
1447       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1448          lp_build_sample_image_nearest(bld,
1449                                        size0,
1450                                        row_stride0_vec, img_stride0_vec,
1451                                        data_ptr0, mipoff0, s, t, r, offsets,
1452                                        &colors0);
1453       }
1454       else {
1455          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1456          lp_build_sample_image_linear(bld,
1457                                       size0,
1458                                       row_stride0_vec, img_stride0_vec,
1459                                       data_ptr0, mipoff0, s, t, r, offsets,
1460                                       &colors0);
1461       }
1462    }
1463
1464    /* Store the first level's colors in the output variables */
1465    LLVMBuildStore(builder, colors0, colors_var);
1466
1467    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1468       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1469                                                      bld->lodf_bld.type, 256.0);
1470       LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1471       struct lp_build_if_state if_ctx;
1472       LLVMValueRef need_lerp;
1473       unsigned num_quads = bld->coord_bld.type.length / 4;
1474       unsigned i;
1475
1476       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1477       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1478
1479       /* need_lerp = lod_fpart > 0 */
1480       if (bld->num_lods == 1) {
1481          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1482                                    lod_fpart, bld->lodi_bld.zero,
1483                                    "need_lerp");
1484       }
1485       else {
1486          /*
1487           * We'll do mip filtering if any of the quads need it.
1488           * It might be better to split the vectors here and only fetch/filter
1489           * quads which need it.
1490           */
1491          /*
1492           * We need to clamp lod_fpart here since we can get negative
1493           * values which would screw up filtering if not all
1494           * lod_fpart values have same sign.
1495           * We can however then skip the greater than comparison.
1496           */
1497          lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1498                                   bld->lodi_bld.zero);
1499          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1500       }
1501
1502       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1503       {
1504          struct lp_build_context u8n_bld;
1505
1506          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1507
1508          /* sample the second mipmap level */
1509          lp_build_mipmap_level_sizes(bld, ilevel1,
1510                                      &size1,
1511                                      &row_stride1_vec, &img_stride1_vec);
1512          if (bld->num_mips == 1) {
1513             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1514          }
1515          else {
1516             data_ptr1 = bld->base_ptr;
1517             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1518          }
1519
1520          if (use_floats) {
1521             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1522                lp_build_sample_image_nearest_afloat(bld,
1523                                                     size1,
1524                                                     row_stride1_vec, img_stride1_vec,
1525                                                     data_ptr1, mipoff1, s, t, r, offsets,
1526                                                     &colors1);
1527             }
1528             else {
1529                lp_build_sample_image_linear_afloat(bld,
1530                                                    size1,
1531                                                    row_stride1_vec, img_stride1_vec,
1532                                                    data_ptr1, mipoff1, s, t, r, offsets,
1533                                                    &colors1);
1534             }
1535          }
1536          else {
1537             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1538                lp_build_sample_image_nearest(bld,
1539                                              size1,
1540                                              row_stride1_vec, img_stride1_vec,
1541                                              data_ptr1, mipoff1, s, t, r, offsets,
1542                                              &colors1);
1543             }
1544             else {
1545                lp_build_sample_image_linear(bld,
1546                                             size1,
1547                                             row_stride1_vec, img_stride1_vec,
1548                                             data_ptr1, mipoff1, s, t, r, offsets,
1549                                             &colors1);
1550             }
1551          }
1552
1553          /* interpolate samples from the two mipmap levels */
1554
1555          if (num_quads == 1 && bld->num_lods == 1) {
1556             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1557             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1558          }
1559          else {
1560             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1561             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1562             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1563
1564             /* Take the LSB of lod_fpart */
1565             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1566
1567             /* Broadcast each lod weight into their respective channels */
1568             for (i = 0; i < u8n_bld.type.length; ++i) {
1569                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1570             }
1571             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1572                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
1573          }
1574
1575          colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1576                                  colors0, colors1,
1577                                  LP_BLD_LERP_PRESCALED_WEIGHTS);
1578
1579          LLVMBuildStore(builder, colors0, colors_var);
1580       }
1581       lp_build_endif(&if_ctx);
1582    }
1583 }
1584
1585
1586
1587 /**
1588  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1589  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1590  * but only limited texture coord wrap modes.
1591  */
1592 void
1593 lp_build_sample_aos(struct lp_build_sample_context *bld,
1594                     unsigned sampler_unit,
1595                     LLVMValueRef s,
1596                     LLVMValueRef t,
1597                     LLVMValueRef r,
1598                     const LLVMValueRef *offsets,
1599                     LLVMValueRef lod_positive,
1600                     LLVMValueRef lod_fpart,
1601                     LLVMValueRef ilevel0,
1602                     LLVMValueRef ilevel1,
1603                     LLVMValueRef texel_out[4])
1604 {
1605    LLVMBuilderRef builder = bld->gallivm->builder;
1606    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1607    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1608    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1609    const unsigned dims = bld->dims;
1610    LLVMValueRef packed_var, packed;
1611    LLVMValueRef unswizzled[4];
1612    struct lp_build_context u8n_bld;
1613
1614    /* we only support the common/simple wrap modes at this time */
1615    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1616    if (dims >= 2)
1617       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1618    if (dims >= 3)
1619       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1620
1621
1622    /* make 8-bit unorm builder context */
1623    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1624
1625    /*
1626     * Get/interpolate texture colors.
1627     */
1628
1629    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1630
1631    if (min_filter == mag_filter) {
1632       /* no need to distinguish between minification and magnification */
1633       lp_build_sample_mipmap(bld,
1634                              min_filter, mip_filter,
1635                              s, t, r, offsets,
1636                              ilevel0, ilevel1, lod_fpart,
1637                              packed_var);
1638    }
1639    else {
1640       /* Emit conditional to choose min image filter or mag image filter
1641        * depending on the lod being > 0 or <= 0, respectively.
1642        */
1643       struct lp_build_if_state if_ctx;
1644
1645       /*
1646        * FIXME this should take all lods into account, if some are min
1647        * some max probably could hack up the weights in the linear
1648        * path with selects to work for nearest.
1649        */
1650       if (bld->num_lods > 1)
1651          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1652                                                 lp_build_const_int32(bld->gallivm, 0), "");
1653
1654       lod_positive = LLVMBuildTrunc(builder, lod_positive,
1655                                     LLVMInt1TypeInContext(bld->gallivm->context), "");
1656
1657       lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1658       {
1659          /* Use the minification filter */
1660          lp_build_sample_mipmap(bld,
1661                                 min_filter, mip_filter,
1662                                 s, t, r, offsets,
1663                                 ilevel0, ilevel1, lod_fpart,
1664                                 packed_var);
1665       }
1666       lp_build_else(&if_ctx);
1667       {
1668          /* Use the magnification filter */
1669          lp_build_sample_mipmap(bld,
1670                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1671                                 s, t, r, offsets,
1672                                 ilevel0, NULL, NULL,
1673                                 packed_var);
1674       }
1675       lp_build_endif(&if_ctx);
1676    }
1677
1678    packed = LLVMBuildLoad(builder, packed_var, "");
1679
1680    /*
1681     * Convert to SoA and swizzle.
1682     */
1683    lp_build_rgba8_to_fi32_soa(bld->gallivm,
1684                              bld->texel_type,
1685                              packed, unswizzled);
1686
1687    if (util_format_is_rgba8_variant(bld->format_desc)) {
1688       lp_build_format_swizzle_soa(bld->format_desc,
1689                                   &bld->texel_bld,
1690                                   unswizzled, texel_out);
1691    }
1692    else {
1693       texel_out[0] = unswizzled[0];
1694       texel_out[1] = unswizzled[1];
1695       texel_out[2] = unswizzled[2];
1696       texel_out[3] = unswizzled[3];
1697    }
1698 }