src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- AoS.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_bitarit.h"
  50 #include "lp_bld_logic.h"
  51 #include "lp_bld_swizzle.h"
  52 #include "lp_bld_pack.h"
  53 #include "lp_bld_flow.h"
  54 #include "lp_bld_gather.h"
  55 #include "lp_bld_format.h"
  56 #include "lp_bld_init.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_quad.h"
  60
  61
  62 /**
  63  * Build LLVM code for texture coord wrapping, for nearest filtering,
  64  * for scaled integer texcoords.
  65  * \param block_length  is the length of the pixel block along the
  66  *                      coordinate axis
  67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
  68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
  69  * \param length  the texture size along one dimension
  70  * \param stride  pixel stride along the coordinate axis (in bytes)
  71  * \param offset  the texel offset along the coord axis
  72  * \param is_pot  if TRUE, length is a power of two
  73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  74  * \param out_offset  byte offset for the wrapped coordinate
  75  * \param out_i  resulting sub-block pixel coordinate for coord0
  76  */
  77 static void
  78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  79                                  unsigned block_length,
  80                                  LLVMValueRef coord,
  81                                  LLVMValueRef coord_f,
  82                                  LLVMValueRef length,
  83                                  LLVMValueRef stride,
  84                                  LLVMValueRef offset,
  85                                  boolean is_pot,
  86                                  unsigned wrap_mode,
  87                                  LLVMValueRef *out_offset,
  88                                  LLVMValueRef *out_i)
  89 {
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef length_minus_one;
  93
  94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  95
  96    switch(wrap_mode) {
  97    case PIPE_TEX_WRAP_REPEAT:
  98       if(is_pot)
  99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
 100       else {
 101          struct lp_build_context *coord_bld = &bld->coord_bld;
 102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 103          if (offset) {
 104             offset = lp_build_int_to_float(coord_bld, offset);
 105             offset = lp_build_div(coord_bld, offset, length_f);
 106             coord_f = lp_build_add(coord_bld, coord_f, offset);
 107          }
 108          coord = lp_build_fract_safe(coord_bld, coord_f);
 109          coord = lp_build_mul(coord_bld, coord, length_f);
 110          coord = lp_build_itrunc(coord_bld, coord);
 111       }
 112       break;
 113
 114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 117       break;
 118
 119    case PIPE_TEX_WRAP_CLAMP:
 120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 125    default:
 126       assert(0);
 127    }
 128
 129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
 130                                   out_offset, out_i);
 131 }
 132
 133
 134 /**
 135  * Build LLVM code for texture coord wrapping, for nearest filtering,
 136  * for float texcoords.
 137  * \param coord  the incoming texcoord (s,t or r)
 138  * \param length  the texture size along one dimension
 139  * \param offset  the texel offset along the coord axis
 140  * \param is_pot  if TRUE, length is a power of two
 141  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 142  * \param icoord  the texcoord after wrapping, as int
 143  */
 144 static void
 145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
 146                                    LLVMValueRef coord,
 147                                    LLVMValueRef length,
 148                                    LLVMValueRef offset,
 149                                    boolean is_pot,
 150                                    unsigned wrap_mode,
 151                                    LLVMValueRef *icoord)
 152 {
 153    struct lp_build_context *coord_bld = &bld->coord_bld;
 154    LLVMValueRef length_minus_one;
 155
 156    switch(wrap_mode) {
 157    case PIPE_TEX_WRAP_REPEAT:
 158       if (offset) {
 159          /* this is definitely not ideal for POT case */
 160          offset = lp_build_int_to_float(coord_bld, offset);
 161          offset = lp_build_div(coord_bld, offset, length);
 162          coord = lp_build_add(coord_bld, coord, offset);
 163       }
 164       /* take fraction, unnormalize */
 165       coord = lp_build_fract_safe(coord_bld, coord);
 166       coord = lp_build_mul(coord_bld, coord, length);
 167       *icoord = lp_build_itrunc(coord_bld, coord);
 168       break;
 169    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 170       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 171       if (bld->static_sampler_state->normalized_coords) {
 172          /* scale coord to length */
 173          coord = lp_build_mul(coord_bld, coord, length);
 174       }
 175       if (offset) {
 176          offset = lp_build_int_to_float(coord_bld, offset);
 177          coord = lp_build_add(coord_bld, coord, offset);
 178       }
 179       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 180                              length_minus_one);
 181       *icoord = lp_build_itrunc(coord_bld, coord);
 182       break;
 183
 184    case PIPE_TEX_WRAP_CLAMP:
 185    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 186    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 187    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 188    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 189    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 190    default:
 191       assert(0);
 192    }
 193 }
 194
 195
 196 /**
 197  * Helper to compute the first coord and the weight for
 198  * linear wrap repeat npot textures
 199  */
 200 static void
 201 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
 202                                       LLVMValueRef coord_f,
 203                                       LLVMValueRef length_i,
 204                                       LLVMValueRef length_f,
 205                                       LLVMValueRef *coord0_i,
 206                                       LLVMValueRef *weight_i)
 207 {
 208    struct lp_build_context *coord_bld = &bld->coord_bld;
 209    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 210    struct lp_build_context abs_coord_bld;
 211    struct lp_type abs_type;
 212    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 213                                                 int_coord_bld->one);
 214    LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
 215
 216    /* wrap with normalized floats is just fract */
 217    coord_f = lp_build_fract(coord_bld, coord_f);
 218    /* mul by size */
 219    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 220    /* convert to int, compute lerp weight */
 221    coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
 222
 223    /* At this point we don't have any negative numbers so use non-signed
 224     * build context which might help on some archs.
 225     */
 226    abs_type = coord_bld->type;
 227    abs_type.sign = 0;
 228    lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
 229    *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
 230
 231    /* subtract 0.5 (add -128) */
 232    i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
 233    *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
 234
 235    /* compute fractional part (AND with 0xff) */
 236    i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
 237    *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
 238
 239    /* compute floor (shift right 8) */
 240    i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
 241    *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
 242    /*
 243     * we avoided the 0.5/length division before the repeat wrap,
 244     * now need to fix up edge cases with selects
 245     */
 246    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 247                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 248    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 249 }
 250
 251
 252 /**
 253  * Build LLVM code for texture coord wrapping, for linear filtering,
 254  * for scaled integer texcoords.
 255  * \param block_length  is the length of the pixel block along the
 256  *                      coordinate axis
 257  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
 258  * \param coord_f  the incoming texcoord (s,t or r) as float vec
 259  * \param length  the texture size along one dimension
 260  * \param stride  pixel stride along the coordinate axis (in bytes)
 261  * \param offset  the texel offset along the coord axis
 262  * \param is_pot  if TRUE, length is a power of two
 263  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 264  * \param offset0  resulting relative offset for coord0
 265  * \param offset1  resulting relative offset for coord0 + 1
 266  * \param i0  resulting sub-block pixel coordinate for coord0
 267  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 268  */
 269 static void
 270 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 271                                 unsigned block_length,
 272                                 LLVMValueRef coord0,
 273                                 LLVMValueRef *weight_i,
 274                                 LLVMValueRef coord_f,
 275                                 LLVMValueRef length,
 276                                 LLVMValueRef stride,
 277                                 LLVMValueRef offset,
 278                                 boolean is_pot,
 279                                 unsigned wrap_mode,
 280                                 LLVMValueRef *offset0,
 281                                 LLVMValueRef *offset1,
 282                                 LLVMValueRef *i0,
 283                                 LLVMValueRef *i1)
 284 {
 285    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 286    LLVMBuilderRef builder = bld->gallivm->builder;
 287    LLVMValueRef length_minus_one;
 288    LLVMValueRef lmask, umask, mask;
 289
 290    /*
 291     * If the pixel block covers more than one pixel then there is no easy
 292     * way to calculate offset1 relative to offset0. Instead, compute them
 293     * independently. Otherwise, try to compute offset0 and offset1 with
 294     * a single stride multiplication.
 295     */
 296
 297    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 298
 299    if (block_length != 1) {
 300       LLVMValueRef coord1;
 301       switch(wrap_mode) {
 302       case PIPE_TEX_WRAP_REPEAT:
 303          if (is_pot) {
 304             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 305             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 306             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 307          }
 308          else {
 309             LLVMValueRef mask;
 310             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 311             if (offset) {
 312                offset = lp_build_int_to_float(&bld->coord_bld, offset);
 313                offset = lp_build_div(&bld->coord_bld, offset, length_f);
 314                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 315             }
 316             lp_build_coord_repeat_npot_linear_int(bld, coord_f,
 317                                                   length, length_f,
 318                                                   &coord0, weight_i);
 319             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 320                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 321             coord1 = LLVMBuildAnd(builder,
 322                                   lp_build_add(int_coord_bld, coord0,
 323                                                int_coord_bld->one),
 324                                   mask, "");
 325          }
 326          break;
 327
 328       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 329          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 330          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
 331                                 length_minus_one);
 332          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
 333                                 length_minus_one);
 334          break;
 335
 336       case PIPE_TEX_WRAP_CLAMP:
 337       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 338       case PIPE_TEX_WRAP_MIRROR_REPEAT:
 339       case PIPE_TEX_WRAP_MIRROR_CLAMP:
 340       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 341       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 342       default:
 343          assert(0);
 344          coord0 = int_coord_bld->zero;
 345          coord1 = int_coord_bld->zero;
 346          break;
 347       }
 348       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
 349                                      offset0, i0);
 350       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
 351                                      offset1, i1);
 352       return;
 353    }
 354
 355    *i0 = int_coord_bld->zero;
 356    *i1 = int_coord_bld->zero;
 357
 358    switch(wrap_mode) {
 359    case PIPE_TEX_WRAP_REPEAT:
 360       if (is_pot) {
 361          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 362       }
 363       else {
 364          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 365          if (offset) {
 366             offset = lp_build_int_to_float(&bld->coord_bld, offset);
 367             offset = lp_build_div(&bld->coord_bld, offset, length_f);
 368             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 369          }
 370          lp_build_coord_repeat_npot_linear_int(bld, coord_f,
 371                                                length, length_f,
 372                                                &coord0, weight_i);
 373       }
 374
 375       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 376                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 377
 378       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 379       *offset1 = LLVMBuildAnd(builder,
 380                               lp_build_add(int_coord_bld, *offset0, stride),
 381                               mask, "");
 382       break;
 383
 384    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 385       /* XXX this might be slower than the separate path
 386        * on some newer cpus. With sse41 this is 8 instructions vs. 7
 387        * - at least on SNB this is almost certainly slower since
 388        * min/max are cheaper than selects, and the muls aren't bad.
 389        */
 390       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 391                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 392       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 393                                PIPE_FUNC_LESS, coord0, length_minus_one);
 394
 395       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 396       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 397
 398       mask = LLVMBuildAnd(builder, lmask, umask, "");
 399
 400       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 401       *offset1 = lp_build_add(int_coord_bld,
 402                               *offset0,
 403                               LLVMBuildAnd(builder, stride, mask, ""));
 404       break;
 405
 406    case PIPE_TEX_WRAP_CLAMP:
 407    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 408    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 409    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 410    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 411    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 412    default:
 413       assert(0);
 414       *offset0 = int_coord_bld->zero;
 415       *offset1 = int_coord_bld->zero;
 416       break;
 417    }
 418 }
 419
 420
 421 /**
 422  * Build LLVM code for texture coord wrapping, for linear filtering,
 423  * for float texcoords.
 424  * \param block_length  is the length of the pixel block along the
 425  *                      coordinate axis
 426  * \param coord  the incoming texcoord (s,t or r)
 427  * \param length  the texture size along one dimension
 428  * \param offset  the texel offset along the coord axis
 429  * \param is_pot  if TRUE, length is a power of two
 430  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 431  * \param coord0  the first texcoord after wrapping, as int
 432  * \param coord1  the second texcoord after wrapping, as int
 433  * \param weight  the filter weight as int (0-255)
 434  * \param force_nearest  if this coord actually uses nearest filtering
 435  */
 436 static void
 437 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
 438                                   unsigned block_length,
 439                                   LLVMValueRef coord,
 440                                   LLVMValueRef length,
 441                                   LLVMValueRef offset,
 442                                   boolean is_pot,
 443                                   unsigned wrap_mode,
 444                                   LLVMValueRef *coord0,
 445                                   LLVMValueRef *coord1,
 446                                   LLVMValueRef *weight,
 447                                   unsigned force_nearest)
 448 {
 449    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 450    struct lp_build_context *coord_bld = &bld->coord_bld;
 451    LLVMBuilderRef builder = bld->gallivm->builder;
 452    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 453    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 454
 455    switch(wrap_mode) {
 456    case PIPE_TEX_WRAP_REPEAT:
 457       if (is_pot) {
 458          /* mul by size and subtract 0.5 */
 459          coord = lp_build_mul(coord_bld, coord, length);
 460          if (offset) {
 461             offset = lp_build_int_to_float(coord_bld, offset);
 462             coord = lp_build_add(coord_bld, coord, offset);
 463          }
 464          if (!force_nearest)
 465             coord = lp_build_sub(coord_bld, coord, half);
 466          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 467          /* convert to int, compute lerp weight */
 468          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 469          *coord1 = lp_build_ifloor(coord_bld, *coord1);
 470          /* repeat wrap */
 471          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
 472          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
 473          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
 474       }
 475       else {
 476          LLVMValueRef mask;
 477          if (offset) {
 478             offset = lp_build_int_to_float(coord_bld, offset);
 479             offset = lp_build_div(coord_bld, offset, length);
 480             coord = lp_build_add(coord_bld, coord, offset);
 481          }
 482          /* wrap with normalized floats is just fract */
 483          coord = lp_build_fract(coord_bld, coord);
 484          /* unnormalize */
 485          coord = lp_build_mul(coord_bld, coord, length);
 486          /*
 487           * we avoided the 0.5/length division, have to fix up wrong
 488           * edge cases with selects
 489           */
 490          *coord1 = lp_build_add(coord_bld, coord, half);
 491          coord = lp_build_sub(coord_bld, coord, half);
 492          *weight = lp_build_fract(coord_bld, coord);
 493          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 494                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
 495          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
 496          *coord0 = lp_build_itrunc(coord_bld, *coord0);
 497          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 498                                  PIPE_FUNC_LESS, *coord1, length);
 499          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
 500          *coord1 = lp_build_itrunc(coord_bld, *coord1);
 501       }
 502       break;
 503    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 504       if (bld->static_sampler_state->normalized_coords) {
 505          /* mul by tex size */
 506          coord = lp_build_mul(coord_bld, coord, length);
 507       }
 508       if (offset) {
 509          offset = lp_build_int_to_float(coord_bld, offset);
 510          coord = lp_build_add(coord_bld, coord, offset);
 511       }
 512       /* subtract 0.5 */
 513       if (!force_nearest) {
 514          coord = lp_build_sub(coord_bld, coord, half);
 515       }
 516       /* clamp to [0, length - 1] */
 517       coord = lp_build_min(coord_bld, coord, length_minus_one);
 518       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 519       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 520       /* convert to int, compute lerp weight */
 521       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 522       /* coord1 = min(coord1, length-1) */
 523       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
 524       *coord1 = lp_build_itrunc(coord_bld, *coord1);
 525       break;
 526    default:
 527       assert(0);
 528       *coord0 = int_coord_bld->zero;
 529       *coord1 = int_coord_bld->zero;
 530       *weight = coord_bld->zero;
 531       break;
 532    }
 533    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
 534    *weight = lp_build_itrunc(coord_bld, *weight);
 535    return;
 536 }
 537
 538
 539 /**
 540  * Fetch texels for image with nearest sampling.
 541  * Return filtered color as two vectors of 16-bit fixed point values.
 542  */
 543 static void
 544 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
 545                                     LLVMValueRef data_ptr,
 546                                     LLVMValueRef offset,
 547                                     LLVMValueRef x_subcoord,
 548                                     LLVMValueRef y_subcoord,
 549                                     LLVMValueRef *colors)
 550 {
 551    /*
 552     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 553     *
 554     *   rgba0 rgba1 rgba2 rgba3
 555     *
 556     * bit cast them into 16 x u8
 557     *
 558     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 559     *
 560     * unpack them into two 8 x i16:
 561     *
 562     *   r0 g0 b0 a0 r1 g1 b1 a1
 563     *   r2 g2 b2 a2 r3 g3 b3 a3
 564     *
 565     * The higher 8 bits of the resulting elements will be zero.
 566     */
 567    LLVMBuilderRef builder = bld->gallivm->builder;
 568    LLVMValueRef rgba8;
 569    struct lp_build_context u8n;
 570    LLVMTypeRef u8n_vec_type;
 571
 572    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 573    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 574
 575    if (util_format_is_rgba8_variant(bld->format_desc)) {
 576       /*
 577        * Given the format is a rgba8, just read the pixels as is,
 578        * without any swizzling. Swizzling will be done later.
 579        */
 580       rgba8 = lp_build_gather(bld->gallivm,
 581                               bld->texel_type.length,
 582                               bld->format_desc->block.bits,
 583                               bld->texel_type.width,
 584                               TRUE,
 585                               data_ptr, offset, TRUE);
 586
 587       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 588    }
 589    else {
 590       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 591                                       bld->format_desc,
 592                                       u8n.type,
 593                                       TRUE,
 594                                       data_ptr, offset,
 595                                       x_subcoord,
 596                                       y_subcoord);
 597    }
 598
 599    *colors = rgba8;
 600 }
 601
 602
 603 /**
 604  * Sample a single texture image with nearest sampling.
 605  * If sampling a cube texture, r = cube face in [0,5].
 606  * Return filtered color as two vectors of 16-bit fixed point values.
 607  */
 608 static void
 609 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 610                               LLVMValueRef int_size,
 611                               LLVMValueRef row_stride_vec,
 612                               LLVMValueRef img_stride_vec,
 613                               LLVMValueRef data_ptr,
 614                               LLVMValueRef mipoffsets,
 615                               LLVMValueRef s,
 616                               LLVMValueRef t,
 617                               LLVMValueRef r,
 618                               const LLVMValueRef *offsets,
 619                               LLVMValueRef *colors)
 620 {
 621    const unsigned dims = bld->dims;
 622    struct lp_build_context i32;
 623    LLVMValueRef width_vec, height_vec, depth_vec;
 624    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
 625    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
 626    LLVMValueRef x_stride;
 627    LLVMValueRef x_offset, offset;
 628    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 629
 630    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 631
 632    lp_build_extract_image_sizes(bld,
 633                                 &bld->int_size_bld,
 634                                 bld->int_coord_type,
 635                                 int_size,
 636                                 &width_vec,
 637                                 &height_vec,
 638                                 &depth_vec);
 639
 640    s_float = s; t_float = t; r_float = r;
 641
 642    if (bld->static_sampler_state->normalized_coords) {
 643       LLVMValueRef flt_size;
 644
 645       flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 646
 647       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 648    }
 649
 650    /* convert float to int */
 651    /* For correct rounding, need floor, not truncation here.
 652     * Note that in some cases (clamp to edge, no texel offsets) we
 653     * could use a non-signed build context which would help archs
 654     * greatly which don't have arch rounding.
 655     */
 656    s_ipart = lp_build_ifloor(&bld->coord_bld, s);
 657    if (dims >= 2)
 658       t_ipart = lp_build_ifloor(&bld->coord_bld, t);
 659    if (dims >= 3)
 660       r_ipart = lp_build_ifloor(&bld->coord_bld, r);
 661
 662    /* add texel offsets */
 663    if (offsets[0]) {
 664       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
 665       if (dims >= 2) {
 666          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
 667          if (dims >= 3) {
 668             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
 669          }
 670       }
 671    }
 672
 673    /* get pixel, row, image strides */
 674    x_stride = lp_build_const_vec(bld->gallivm,
 675                                  bld->int_coord_bld.type,
 676                                  bld->format_desc->block.bits/8);
 677
 678    /* Do texcoord wrapping, compute texel offset */
 679    lp_build_sample_wrap_nearest_int(bld,
 680                                     bld->format_desc->block.width,
 681                                     s_ipart, s_float,
 682                                     width_vec, x_stride, offsets[0],
 683                                     bld->static_texture_state->pot_width,
 684                                     bld->static_sampler_state->wrap_s,
 685                                     &x_offset, &x_subcoord);
 686    offset = x_offset;
 687    if (dims >= 2) {
 688       LLVMValueRef y_offset;
 689       lp_build_sample_wrap_nearest_int(bld,
 690                                        bld->format_desc->block.height,
 691                                        t_ipart, t_float,
 692                                        height_vec, row_stride_vec, offsets[1],
 693                                        bld->static_texture_state->pot_height,
 694                                        bld->static_sampler_state->wrap_t,
 695                                        &y_offset, &y_subcoord);
 696       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
 697       if (dims >= 3) {
 698          LLVMValueRef z_offset;
 699          lp_build_sample_wrap_nearest_int(bld,
 700                                           1, /* block length (depth) */
 701                                           r_ipart, r_float,
 702                                           depth_vec, img_stride_vec, offsets[2],
 703                                           bld->static_texture_state->pot_depth,
 704                                           bld->static_sampler_state->wrap_r,
 705                                           &z_offset, &z_subcoord);
 706          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 707       }
 708    }
 709    if (has_layer_coord(bld->static_texture_state->target)) {
 710       LLVMValueRef z_offset;
 711       /* The r coord is the cube face in [0,5] or array layer */
 712       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 713       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 714    }
 715    if (mipoffsets) {
 716       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 717    }
 718
 719    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 720                                        x_subcoord, y_subcoord,
 721                                        colors);
 722 }
 723
 724
 725 /**
 726  * Sample a single texture image with nearest sampling.
 727  * If sampling a cube texture, r = cube face in [0,5].
 728  * Return filtered color as two vectors of 16-bit fixed point values.
 729  * Does address calcs (except offsets) with floats.
 730  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
 731  */
 732 static void
 733 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
 734                                      LLVMValueRef int_size,
 735                                      LLVMValueRef row_stride_vec,
 736                                      LLVMValueRef img_stride_vec,
 737                                      LLVMValueRef data_ptr,
 738                                      LLVMValueRef mipoffsets,
 739                                      LLVMValueRef s,
 740                                      LLVMValueRef t,
 741                                      LLVMValueRef r,
 742                                      const LLVMValueRef *offsets,
 743                                      LLVMValueRef *colors)
 744    {
 745    const unsigned dims = bld->dims;
 746    LLVMValueRef width_vec, height_vec, depth_vec;
 747    LLVMValueRef offset;
 748    LLVMValueRef x_subcoord, y_subcoord;
 749    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
 750    LLVMValueRef flt_size;
 751
 752    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 753
 754    lp_build_extract_image_sizes(bld,
 755                                 &bld->float_size_bld,
 756                                 bld->coord_type,
 757                                 flt_size,
 758                                 &width_vec,
 759                                 &height_vec,
 760                                 &depth_vec);
 761
 762    /* Do texcoord wrapping */
 763    lp_build_sample_wrap_nearest_float(bld,
 764                                       s, width_vec, offsets[0],
 765                                       bld->static_texture_state->pot_width,
 766                                       bld->static_sampler_state->wrap_s,
 767                                       &x_icoord);
 768
 769    if (dims >= 2) {
 770       lp_build_sample_wrap_nearest_float(bld,
 771                                          t, height_vec, offsets[1],
 772                                          bld->static_texture_state->pot_height,
 773                                          bld->static_sampler_state->wrap_t,
 774                                          &y_icoord);
 775
 776       if (dims >= 3) {
 777          lp_build_sample_wrap_nearest_float(bld,
 778                                             r, depth_vec, offsets[2],
 779                                             bld->static_texture_state->pot_depth,
 780                                             bld->static_sampler_state->wrap_r,
 781                                             &z_icoord);
 782       }
 783    }
 784    if (has_layer_coord(bld->static_texture_state->target)) {
 785       z_icoord = r;
 786    }
 787
 788    /*
 789     * From here on we deal with ints, and we should split up the 256bit
 790     * vectors manually for better generated code.
 791     */
 792
 793    /*
 794     * compute texel offsets -
 795     * cannot do offset calc with floats, difficult for block-based formats,
 796     * and not enough precision anyway.
 797     */
 798    lp_build_sample_offset(&bld->int_coord_bld,
 799                           bld->format_desc,
 800                           x_icoord, y_icoord,
 801                           z_icoord,
 802                           row_stride_vec, img_stride_vec,
 803                           &offset,
 804                           &x_subcoord, &y_subcoord);
 805    if (mipoffsets) {
 806       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 807    }
 808
 809    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 810                                        x_subcoord, y_subcoord,
 811                                        colors);
 812 }
 813
 814
 815 /**
 816  * Fetch texels for image with linear sampling.
 817  * Return filtered color as two vectors of 16-bit fixed point values.
 818  */
 819 static void
 820 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
 821                                    LLVMValueRef data_ptr,
 822                                    LLVMValueRef offset[2][2][2],
 823                                    LLVMValueRef x_subcoord[2],
 824                                    LLVMValueRef y_subcoord[2],
 825                                    LLVMValueRef s_fpart,
 826                                    LLVMValueRef t_fpart,
 827                                    LLVMValueRef r_fpart,
 828                                    LLVMValueRef *colors)
 829 {
 830    const unsigned dims = bld->dims;
 831    LLVMBuilderRef builder = bld->gallivm->builder;
 832    struct lp_build_context u8n;
 833    LLVMTypeRef u8n_vec_type;
 834    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 835    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 836    LLVMValueRef shuffle;
 837    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
 838    LLVMValueRef packed;
 839    unsigned i, j, k;
 840    unsigned numj, numk;
 841
 842    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 843    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 844
 845    /*
 846     * Transform 4 x i32 in
 847     *
 848     *   s_fpart = {s0, s1, s2, s3}
 849     *
 850     * where each value is between 0 and 0xff,
 851     *
 852     * into one 16 x i20
 853     *
 854     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
 855     *
 856     * and likewise for t_fpart. There is no risk of loosing precision here
 857     * since the fractional parts only use the lower 8bits.
 858     */
 859    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
 860    if (dims >= 2)
 861       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
 862    if (dims >= 3)
 863       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
 864
 865    for (j = 0; j < u8n.type.length; j += 4) {
 866 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 867       unsigned subindex = 0;
 868 #else
 869       unsigned subindex = 3;
 870 #endif
 871       LLVMValueRef index;
 872
 873       index = LLVMConstInt(elem_type, j + subindex, 0);
 874       for (i = 0; i < 4; ++i)
 875          shuffles[j + i] = index;
 876    }
 877
 878    shuffle = LLVMConstVector(shuffles, u8n.type.length);
 879
 880    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
 881                                     shuffle, "");
 882    if (dims >= 2) {
 883       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
 884                                        shuffle, "");
 885    }
 886    if (dims >= 3) {
 887       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
 888                                        shuffle, "");
 889    }
 890
 891    /*
 892     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 893     *
 894     *   rgba0 rgba1 rgba2 rgba3
 895     *
 896     * bit cast them into 16 x u8
 897     *
 898     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 899     *
 900     * unpack them into two 8 x i16:
 901     *
 902     *   r0 g0 b0 a0 r1 g1 b1 a1
 903     *   r2 g2 b2 a2 r3 g3 b3 a3
 904     *
 905     * The higher 8 bits of the resulting elements will be zero.
 906     */
 907    numj = 1 + (dims >= 2);
 908    numk = 1 + (dims >= 3);
 909
 910    for (k = 0; k < numk; k++) {
 911       for (j = 0; j < numj; j++) {
 912          for (i = 0; i < 2; i++) {
 913             LLVMValueRef rgba8;
 914
 915             if (util_format_is_rgba8_variant(bld->format_desc)) {
 916                /*
 917                 * Given the format is a rgba8, just read the pixels as is,
 918                 * without any swizzling. Swizzling will be done later.
 919                 */
 920                rgba8 = lp_build_gather(bld->gallivm,
 921                                        bld->texel_type.length,
 922                                        bld->format_desc->block.bits,
 923                                        bld->texel_type.width,
 924                                        TRUE,
 925                                        data_ptr, offset[k][j][i], TRUE);
 926
 927                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 928             }
 929             else {
 930                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 931                                                bld->format_desc,
 932                                                u8n.type,
 933                                                TRUE,
 934                                                data_ptr, offset[k][j][i],
 935                                                x_subcoord[i],
 936                                                y_subcoord[j]);
 937             }
 938
 939             neighbors[k][j][i] = rgba8;
 940          }
 941       }
 942    }
 943
 944    /*
 945     * Linear interpolation with 8.8 fixed point.
 946     */
 947    if (bld->static_sampler_state->force_nearest_s) {
 948       /* special case 1-D lerp */
 949       packed = lp_build_lerp(&u8n,
 950                              t_fpart,
 951                              neighbors[0][0][0],
 952                              neighbors[0][0][1],
 953                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 954    }
 955    else if (bld->static_sampler_state->force_nearest_t) {
 956       /* special case 1-D lerp */
 957       packed = lp_build_lerp(&u8n,
 958                              s_fpart,
 959                              neighbors[0][0][0],
 960                              neighbors[0][0][1],
 961                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 962    }
 963    else {
 964       /* general 1/2/3-D lerping */
 965       if (dims == 1) {
 966          packed = lp_build_lerp(&u8n,
 967                                 s_fpart,
 968                                 neighbors[0][0][0],
 969                                 neighbors[0][0][1],
 970                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
 971       } else if (dims == 2) {
 972          /* 2-D lerp */
 973          packed = lp_build_lerp_2d(&u8n,
 974                                    s_fpart, t_fpart,
 975                                    neighbors[0][0][0],
 976                                    neighbors[0][0][1],
 977                                    neighbors[0][1][0],
 978                                    neighbors[0][1][1],
 979                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 980       } else {
 981          /* 3-D lerp */
 982          assert(dims == 3);
 983          packed = lp_build_lerp_3d(&u8n,
 984                                    s_fpart, t_fpart, r_fpart,
 985                                    neighbors[0][0][0],
 986                                    neighbors[0][0][1],
 987                                    neighbors[0][1][0],
 988                                    neighbors[0][1][1],
 989                                    neighbors[1][0][0],
 990                                    neighbors[1][0][1],
 991                                    neighbors[1][1][0],
 992                                    neighbors[1][1][1],
 993                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 994       }
 995    }
 996
 997    *colors = packed;
 998 }
 999
1000 /**
1001  * Sample a single texture image with (bi-)(tri-)linear sampling.
1002  * Return filtered color as two vectors of 16-bit fixed point values.
1003  */
1004 static void
1005 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1006                              LLVMValueRef int_size,
1007                              LLVMValueRef row_stride_vec,
1008                              LLVMValueRef img_stride_vec,
1009                              LLVMValueRef data_ptr,
1010                              LLVMValueRef mipoffsets,
1011                              LLVMValueRef s,
1012                              LLVMValueRef t,
1013                              LLVMValueRef r,
1014                              const LLVMValueRef *offsets,
1015                              LLVMValueRef *colors)
1016 {
1017    const unsigned dims = bld->dims;
1018    LLVMBuilderRef builder = bld->gallivm->builder;
1019    struct lp_build_context i32;
1020    LLVMValueRef i32_c8, i32_c128, i32_c255;
1021    LLVMValueRef width_vec, height_vec, depth_vec;
1022    LLVMValueRef s_ipart, s_fpart, s_float;
1023    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
1024    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
1025    LLVMValueRef x_stride, y_stride, z_stride;
1026    LLVMValueRef x_offset0, x_offset1;
1027    LLVMValueRef y_offset0, y_offset1;
1028    LLVMValueRef z_offset0, z_offset1;
1029    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1030    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
1031    unsigned x, y, z;
1032
1033    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
1034
1035    lp_build_extract_image_sizes(bld,
1036                                 &bld->int_size_bld,
1037                                 bld->int_coord_type,
1038                                 int_size,
1039                                 &width_vec,
1040                                 &height_vec,
1041                                 &depth_vec);
1042
1043    s_float = s; t_float = t; r_float = r;
1044
1045    if (bld->static_sampler_state->normalized_coords) {
1046       LLVMValueRef scaled_size;
1047       LLVMValueRef flt_size;
1048
1049       /* scale size by 256 (8 fractional bits) */
1050       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1051
1052       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1053
1054       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1055    }
1056    else {
1057       /* scale coords by 256 (8 fractional bits) */
1058       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1059       if (dims >= 2)
1060          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1061       if (dims >= 3)
1062          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1063    }
1064
1065    /* convert float to int */
1066    /* For correct rounding, need round to nearest, not truncation here.
1067     * Note that in some cases (clamp to edge, no texel offsets) we
1068     * could use a non-signed build context which would help archs which
1069     * don't have fptosi intrinsic with nearest rounding implemented.
1070     */
1071    s = lp_build_iround(&bld->coord_bld, s);
1072    if (dims >= 2)
1073       t = lp_build_iround(&bld->coord_bld, t);
1074    if (dims >= 3)
1075       r = lp_build_iround(&bld->coord_bld, r);
1076
1077    /* subtract 0.5 (add -128) */
1078    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1079    if (!bld->static_sampler_state->force_nearest_s) {
1080       s = LLVMBuildAdd(builder, s, i32_c128, "");
1081    }
1082    if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
1083       t = LLVMBuildAdd(builder, t, i32_c128, "");
1084    }
1085    if (dims >= 3) {
1086       r = LLVMBuildAdd(builder, r, i32_c128, "");
1087    }
1088
1089    /* compute floor (shift right 8) */
1090    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1091    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1092    if (dims >= 2)
1093       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1094    if (dims >= 3)
1095       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1096
1097    /* add texel offsets */
1098    if (offsets[0]) {
1099       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
1100       if (dims >= 2) {
1101          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
1102          if (dims >= 3) {
1103             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
1104          }
1105       }
1106    }
1107
1108    /* compute fractional part (AND with 0xff) */
1109    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1110    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1111    if (dims >= 2)
1112       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1113    if (dims >= 3)
1114       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1115
1116    /* get pixel, row and image strides */
1117    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1118                                  bld->format_desc->block.bits/8);
1119    y_stride = row_stride_vec;
1120    z_stride = img_stride_vec;
1121
1122    /* do texcoord wrapping and compute texel offsets */
1123    lp_build_sample_wrap_linear_int(bld,
1124                                    bld->format_desc->block.width,
1125                                    s_ipart, &s_fpart, s_float,
1126                                    width_vec, x_stride, offsets[0],
1127                                    bld->static_texture_state->pot_width,
1128                                    bld->static_sampler_state->wrap_s,
1129                                    &x_offset0, &x_offset1,
1130                                    &x_subcoord[0], &x_subcoord[1]);
1131
1132    /* add potential cube/array/mip offsets now as they are constant per pixel */
1133    if (has_layer_coord(bld->static_texture_state->target)) {
1134       LLVMValueRef z_offset;
1135       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1136       /* The r coord is the cube face in [0,5] or array layer */
1137       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1138       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1139    }
1140    if (mipoffsets) {
1141       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1142       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1143    }
1144
1145    for (z = 0; z < 2; z++) {
1146       for (y = 0; y < 2; y++) {
1147          offset[z][y][0] = x_offset0;
1148          offset[z][y][1] = x_offset1;
1149       }
1150    }
1151
1152    if (dims >= 2) {
1153       lp_build_sample_wrap_linear_int(bld,
1154                                       bld->format_desc->block.height,
1155                                       t_ipart, &t_fpart, t_float,
1156                                       height_vec, y_stride, offsets[1],
1157                                       bld->static_texture_state->pot_height,
1158                                       bld->static_sampler_state->wrap_t,
1159                                       &y_offset0, &y_offset1,
1160                                       &y_subcoord[0], &y_subcoord[1]);
1161
1162       for (z = 0; z < 2; z++) {
1163          for (x = 0; x < 2; x++) {
1164             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1165                                            offset[z][0][x], y_offset0);
1166             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1167                                            offset[z][1][x], y_offset1);
1168          }
1169       }
1170    }
1171
1172    if (dims >= 3) {
1173       lp_build_sample_wrap_linear_int(bld,
1174                                       1, /* block length (depth) */
1175                                       r_ipart, &r_fpart, r_float,
1176                                       depth_vec, z_stride, offsets[2],
1177                                       bld->static_texture_state->pot_depth,
1178                                       bld->static_sampler_state->wrap_r,
1179                                       &z_offset0, &z_offset1,
1180                                       &z_subcoord[0], &z_subcoord[1]);
1181       for (y = 0; y < 2; y++) {
1182          for (x = 0; x < 2; x++) {
1183             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1184                                            offset[0][y][x], z_offset0);
1185             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1186                                            offset[1][y][x], z_offset1);
1187          }
1188       }
1189    }
1190
1191    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1192                                       x_subcoord, y_subcoord,
1193                                       s_fpart, t_fpart, r_fpart,
1194                                       colors);
1195 }
1196
1197
1198 /**
1199  * Sample a single texture image with (bi-)(tri-)linear sampling.
1200  * Return filtered color as two vectors of 16-bit fixed point values.
1201  * Does address calcs (except offsets) with floats.
1202  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1203  */
1204 static void
1205 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1206                                     LLVMValueRef int_size,
1207                                     LLVMValueRef row_stride_vec,
1208                                     LLVMValueRef img_stride_vec,
1209                                     LLVMValueRef data_ptr,
1210                                     LLVMValueRef mipoffsets,
1211                                     LLVMValueRef s,
1212                                     LLVMValueRef t,
1213                                     LLVMValueRef r,
1214                                     const LLVMValueRef *offsets,
1215                                     LLVMValueRef *colors)
1216 {
1217    const unsigned dims = bld->dims;
1218    LLVMValueRef width_vec, height_vec, depth_vec;
1219    LLVMValueRef s_fpart;
1220    LLVMValueRef t_fpart = NULL;
1221    LLVMValueRef r_fpart = NULL;
1222    LLVMValueRef x_stride, y_stride, z_stride;
1223    LLVMValueRef x_offset0, x_offset1;
1224    LLVMValueRef y_offset0, y_offset1;
1225    LLVMValueRef z_offset0, z_offset1;
1226    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1227    LLVMValueRef x_subcoord[2], y_subcoord[2];
1228    LLVMValueRef flt_size;
1229    LLVMValueRef x_icoord0, x_icoord1;
1230    LLVMValueRef y_icoord0, y_icoord1;
1231    LLVMValueRef z_icoord0, z_icoord1;
1232    unsigned x, y, z;
1233
1234    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1235
1236    lp_build_extract_image_sizes(bld,
1237                                 &bld->float_size_bld,
1238                                 bld->coord_type,
1239                                 flt_size,
1240                                 &width_vec,
1241                                 &height_vec,
1242                                 &depth_vec);
1243
1244    /* do texcoord wrapping and compute texel offsets */
1245    lp_build_sample_wrap_linear_float(bld,
1246                                      bld->format_desc->block.width,
1247                                      s, width_vec, offsets[0],
1248                                      bld->static_texture_state->pot_width,
1249                                      bld->static_sampler_state->wrap_s,
1250                                      &x_icoord0, &x_icoord1,
1251                                      &s_fpart,
1252                                      bld->static_sampler_state->force_nearest_s);
1253
1254    if (dims >= 2) {
1255       lp_build_sample_wrap_linear_float(bld,
1256                                         bld->format_desc->block.height,
1257                                         t, height_vec, offsets[1],
1258                                         bld->static_texture_state->pot_height,
1259                                         bld->static_sampler_state->wrap_t,
1260                                         &y_icoord0, &y_icoord1,
1261                                         &t_fpart,
1262                                         bld->static_sampler_state->force_nearest_t);
1263
1264       if (dims >= 3) {
1265          lp_build_sample_wrap_linear_float(bld,
1266                                            1, /* block length (depth) */
1267                                            r, depth_vec, offsets[2],
1268                                            bld->static_texture_state->pot_depth,
1269                                            bld->static_sampler_state->wrap_r,
1270                                            &z_icoord0, &z_icoord1,
1271                                            &r_fpart, 0);
1272       }
1273    }
1274
1275    /*
1276     * From here on we deal with ints, and we should split up the 256bit
1277     * vectors manually for better generated code.
1278     */
1279
1280    /* get pixel, row and image strides */
1281    x_stride = lp_build_const_vec(bld->gallivm,
1282                                  bld->int_coord_bld.type,
1283                                  bld->format_desc->block.bits/8);
1284    y_stride = row_stride_vec;
1285    z_stride = img_stride_vec;
1286
1287    /*
1288     * compute texel offset -
1289     * cannot do offset calc with floats, difficult for block-based formats,
1290     * and not enough precision anyway.
1291     */
1292    lp_build_sample_partial_offset(&bld->int_coord_bld,
1293                                   bld->format_desc->block.width,
1294                                   x_icoord0, x_stride,
1295                                   &x_offset0, &x_subcoord[0]);
1296    lp_build_sample_partial_offset(&bld->int_coord_bld,
1297                                   bld->format_desc->block.width,
1298                                   x_icoord1, x_stride,
1299                                   &x_offset1, &x_subcoord[1]);
1300
1301    /* add potential cube/array/mip offsets now as they are constant per pixel */
1302    if (has_layer_coord(bld->static_texture_state->target)) {
1303       LLVMValueRef z_offset;
1304       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1305       /* The r coord is the cube face in [0,5] or array layer */
1306       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1307       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1308    }
1309    if (mipoffsets) {
1310       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1311       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1312    }
1313
1314    for (z = 0; z < 2; z++) {
1315       for (y = 0; y < 2; y++) {
1316          offset[z][y][0] = x_offset0;
1317          offset[z][y][1] = x_offset1;
1318       }
1319    }
1320
1321    if (dims >= 2) {
1322       lp_build_sample_partial_offset(&bld->int_coord_bld,
1323                                      bld->format_desc->block.height,
1324                                      y_icoord0, y_stride,
1325                                      &y_offset0, &y_subcoord[0]);
1326       lp_build_sample_partial_offset(&bld->int_coord_bld,
1327                                      bld->format_desc->block.height,
1328                                      y_icoord1, y_stride,
1329                                      &y_offset1, &y_subcoord[1]);
1330       for (z = 0; z < 2; z++) {
1331          for (x = 0; x < 2; x++) {
1332             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1333                                            offset[z][0][x], y_offset0);
1334             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1335                                            offset[z][1][x], y_offset1);
1336          }
1337       }
1338    }
1339
1340    if (dims >= 3) {
1341       LLVMValueRef z_subcoord[2];
1342       lp_build_sample_partial_offset(&bld->int_coord_bld,
1343                                      1,
1344                                      z_icoord0, z_stride,
1345                                      &z_offset0, &z_subcoord[0]);
1346       lp_build_sample_partial_offset(&bld->int_coord_bld,
1347                                      1,
1348                                      z_icoord1, z_stride,
1349                                      &z_offset1, &z_subcoord[1]);
1350       for (y = 0; y < 2; y++) {
1351          for (x = 0; x < 2; x++) {
1352             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1353                                            offset[0][y][x], z_offset0);
1354             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1355                                            offset[1][y][x], z_offset1);
1356          }
1357       }
1358    }
1359
1360    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1361                                       x_subcoord, y_subcoord,
1362                                       s_fpart, t_fpart, r_fpart,
1363                                       colors);
1364 }
1365
1366
1367 /**
1368  * Sample the texture/mipmap using given image filter and mip filter.
1369  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1370  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1371  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1372  */
1373 static void
1374 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1375                        unsigned img_filter,
1376                        unsigned mip_filter,
1377                        LLVMValueRef s,
1378                        LLVMValueRef t,
1379                        LLVMValueRef r,
1380                        const LLVMValueRef *offsets,
1381                        LLVMValueRef ilevel0,
1382                        LLVMValueRef ilevel1,
1383                        LLVMValueRef lod_fpart,
1384                        LLVMValueRef colors_var)
1385 {
1386    LLVMBuilderRef builder = bld->gallivm->builder;
1387    LLVMValueRef size0;
1388    LLVMValueRef size1;
1389    LLVMValueRef row_stride0_vec = NULL;
1390    LLVMValueRef row_stride1_vec = NULL;
1391    LLVMValueRef img_stride0_vec = NULL;
1392    LLVMValueRef img_stride1_vec = NULL;
1393    LLVMValueRef data_ptr0;
1394    LLVMValueRef data_ptr1;
1395    LLVMValueRef mipoff0 = NULL;
1396    LLVMValueRef mipoff1 = NULL;
1397    LLVMValueRef colors0;
1398    LLVMValueRef colors1;
1399
1400    /* sample the first mipmap level */
1401    lp_build_mipmap_level_sizes(bld, ilevel0,
1402                                &size0,
1403                                &row_stride0_vec, &img_stride0_vec);
1404    if (bld->num_mips == 1) {
1405       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1406    }
1407    else {
1408       /* This path should work for num_lods 1 too but slightly less efficient */
1409       data_ptr0 = bld->base_ptr;
1410       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1411    }
1412
1413    if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1414       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1415          lp_build_sample_image_nearest_afloat(bld,
1416                                               size0,
1417                                               row_stride0_vec, img_stride0_vec,
1418                                               data_ptr0, mipoff0, s, t, r, offsets,
1419                                               &colors0);
1420       }
1421       else {
1422          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1423          lp_build_sample_image_linear_afloat(bld,
1424                                              size0,
1425                                              row_stride0_vec, img_stride0_vec,
1426                                              data_ptr0, mipoff0, s, t, r, offsets,
1427                                              &colors0);
1428       }
1429    }
1430    else {
1431       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1432          lp_build_sample_image_nearest(bld,
1433                                        size0,
1434                                        row_stride0_vec, img_stride0_vec,
1435                                        data_ptr0, mipoff0, s, t, r, offsets,
1436                                        &colors0);
1437       }
1438       else {
1439          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1440          lp_build_sample_image_linear(bld,
1441                                       size0,
1442                                       row_stride0_vec, img_stride0_vec,
1443                                       data_ptr0, mipoff0, s, t, r, offsets,
1444                                       &colors0);
1445       }
1446    }
1447
1448    /* Store the first level's colors in the output variables */
1449    LLVMBuildStore(builder, colors0, colors_var);
1450
1451    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1452       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1453                                                      bld->lodf_bld.type, 256.0);
1454       LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1455       struct lp_build_if_state if_ctx;
1456       LLVMValueRef need_lerp;
1457       unsigned num_quads = bld->coord_bld.type.length / 4;
1458       unsigned i;
1459
1460       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1461       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1462
1463       /* need_lerp = lod_fpart > 0 */
1464       if (bld->num_lods == 1) {
1465          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1466                                    lod_fpart, bld->lodi_bld.zero,
1467                                    "need_lerp");
1468       }
1469       else {
1470          /*
1471           * We'll do mip filtering if any of the quads need it.
1472           * It might be better to split the vectors here and only fetch/filter
1473           * quads which need it.
1474           */
1475          /*
1476           * We need to clamp lod_fpart here since we can get negative
1477           * values which would screw up filtering if not all
1478           * lod_fpart values have same sign.
1479           * We can however then skip the greater than comparison.
1480           */
1481          lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1482                                   bld->lodi_bld.zero);
1483          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1484       }
1485
1486       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1487       {
1488          struct lp_build_context u8n_bld;
1489
1490          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1491
1492          /* sample the second mipmap level */
1493          lp_build_mipmap_level_sizes(bld, ilevel1,
1494                                      &size1,
1495                                      &row_stride1_vec, &img_stride1_vec);
1496          if (bld->num_mips == 1) {
1497             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1498          }
1499          else {
1500             data_ptr1 = bld->base_ptr;
1501             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1502          }
1503
1504          if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1505             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1506                lp_build_sample_image_nearest_afloat(bld,
1507                                                     size1,
1508                                                     row_stride1_vec, img_stride1_vec,
1509                                                     data_ptr1, mipoff1, s, t, r, offsets,
1510                                                     &colors1);
1511             }
1512             else {
1513                lp_build_sample_image_linear_afloat(bld,
1514                                                    size1,
1515                                                    row_stride1_vec, img_stride1_vec,
1516                                                    data_ptr1, mipoff1, s, t, r, offsets,
1517                                                    &colors1);
1518             }
1519          }
1520          else {
1521             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1522                lp_build_sample_image_nearest(bld,
1523                                              size1,
1524                                              row_stride1_vec, img_stride1_vec,
1525                                              data_ptr1, mipoff1, s, t, r, offsets,
1526                                              &colors1);
1527             }
1528             else {
1529                lp_build_sample_image_linear(bld,
1530                                             size1,
1531                                             row_stride1_vec, img_stride1_vec,
1532                                             data_ptr1, mipoff1, s, t, r, offsets,
1533                                             &colors1);
1534             }
1535          }
1536
1537          /* interpolate samples from the two mipmap levels */
1538
1539          if (num_quads == 1 && bld->num_lods == 1) {
1540             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1541             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1542          }
1543          else {
1544             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1545             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1546             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1547
1548             /* Take the LSB of lod_fpart */
1549             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1550
1551             /* Broadcast each lod weight into their respective channels */
1552             for (i = 0; i < u8n_bld.type.length; ++i) {
1553                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1554             }
1555             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1556                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
1557          }
1558
1559          colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1560                                  colors0, colors1,
1561                                  LP_BLD_LERP_PRESCALED_WEIGHTS);
1562
1563          LLVMBuildStore(builder, colors0, colors_var);
1564       }
1565       lp_build_endif(&if_ctx);
1566    }
1567 }
1568
1569
1570
1571 /**
1572  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1573  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1574  * but only limited texture coord wrap modes.
1575  */
1576 void
1577 lp_build_sample_aos(struct lp_build_sample_context *bld,
1578                     unsigned sampler_unit,
1579                     LLVMValueRef s,
1580                     LLVMValueRef t,
1581                     LLVMValueRef r,
1582                     const LLVMValueRef *offsets,
1583                     LLVMValueRef lod_positive,
1584                     LLVMValueRef lod_fpart,
1585                     LLVMValueRef ilevel0,
1586                     LLVMValueRef ilevel1,
1587                     LLVMValueRef texel_out[4])
1588 {
1589    LLVMBuilderRef builder = bld->gallivm->builder;
1590    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1591    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1592    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1593    const unsigned dims = bld->dims;
1594    LLVMValueRef packed_var, packed;
1595    LLVMValueRef unswizzled[4];
1596    struct lp_build_context u8n_bld;
1597
1598    /* we only support the common/simple wrap modes at this time */
1599    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1600    if (dims >= 2)
1601       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1602    if (dims >= 3)
1603       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1604
1605
1606    /* make 8-bit unorm builder context */
1607    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1608
1609    /*
1610     * Get/interpolate texture colors.
1611     */
1612
1613    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1614
1615    if (min_filter == mag_filter) {
1616       /* no need to distinguish between minification and magnification */
1617       lp_build_sample_mipmap(bld,
1618                              min_filter, mip_filter,
1619                              s, t, r, offsets,
1620                              ilevel0, ilevel1, lod_fpart,
1621                              packed_var);
1622    }
1623    else {
1624       /* Emit conditional to choose min image filter or mag image filter
1625        * depending on the lod being > 0 or <= 0, respectively.
1626        */
1627       struct lp_build_if_state if_ctx;
1628
1629       /*
1630        * FIXME this should take all lods into account, if some are min
1631        * some max probably could hack up the weights in the linear
1632        * path with selects to work for nearest.
1633        */
1634       if (bld->num_lods > 1)
1635          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1636                                                 lp_build_const_int32(bld->gallivm, 0), "");
1637
1638       lod_positive = LLVMBuildTrunc(builder, lod_positive,
1639                                     LLVMInt1TypeInContext(bld->gallivm->context), "");
1640
1641       lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1642       {
1643          /* Use the minification filter */
1644          lp_build_sample_mipmap(bld,
1645                                 min_filter, mip_filter,
1646                                 s, t, r, offsets,
1647                                 ilevel0, ilevel1, lod_fpart,
1648                                 packed_var);
1649       }
1650       lp_build_else(&if_ctx);
1651       {
1652          /* Use the magnification filter */
1653          lp_build_sample_mipmap(bld,
1654                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1655                                 s, t, r, offsets,
1656                                 ilevel0, NULL, NULL,
1657                                 packed_var);
1658       }
1659       lp_build_endif(&if_ctx);
1660    }
1661
1662    packed = LLVMBuildLoad(builder, packed_var, "");
1663
1664    /*
1665     * Convert to SoA and swizzle.
1666     */
1667    lp_build_rgba8_to_fi32_soa(bld->gallivm,
1668                              bld->texel_type,
1669                              packed, unswizzled);
1670
1671    if (util_format_is_rgba8_variant(bld->format_desc)) {
1672       lp_build_format_swizzle_soa(bld->format_desc,
1673                                   &bld->texel_bld,
1674                                   unswizzled, texel_out);
1675    }
1676    else {
1677       texel_out[0] = unswizzled[0];
1678       texel_out[1] = unswizzled[1];
1679       texel_out[2] = unswizzled[2];
1680       texel_out[3] = unswizzled[3];
1681    }
1682 }