src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- AoS.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_bitarit.h"
  50 #include "lp_bld_logic.h"
  51 #include "lp_bld_swizzle.h"
  52 #include "lp_bld_pack.h"
  53 #include "lp_bld_flow.h"
  54 #include "lp_bld_gather.h"
  55 #include "lp_bld_format.h"
  56 #include "lp_bld_init.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_quad.h"
  60
  61
  62 /**
  63  * Build LLVM code for texture coord wrapping, for nearest filtering,
  64  * for scaled integer texcoords.
  65  * \param block_length  is the length of the pixel block along the
  66  *                      coordinate axis
  67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
  68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
  69  * \param length  the texture size along one dimension
  70  * \param stride  pixel stride along the coordinate axis (in bytes)
  71  * \param offset  the texel offset along the coord axis
  72  * \param is_pot  if TRUE, length is a power of two
  73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  74  * \param out_offset  byte offset for the wrapped coordinate
  75  * \param out_i  resulting sub-block pixel coordinate for coord0
  76  */
  77 static void
  78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  79                                  unsigned block_length,
  80                                  LLVMValueRef coord,
  81                                  LLVMValueRef coord_f,
  82                                  LLVMValueRef length,
  83                                  LLVMValueRef stride,
  84                                  LLVMValueRef offset,
  85                                  boolean is_pot,
  86                                  unsigned wrap_mode,
  87                                  LLVMValueRef *out_offset,
  88                                  LLVMValueRef *out_i)
  89 {
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef length_minus_one;
  93
  94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  95
  96    switch(wrap_mode) {
  97    case PIPE_TEX_WRAP_REPEAT:
  98       if(is_pot)
  99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
 100       else {
 101          struct lp_build_context *coord_bld = &bld->coord_bld;
 102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 103          if (offset) {
 104             offset = lp_build_int_to_float(coord_bld, offset);
 105             offset = lp_build_div(coord_bld, offset, length_f);
 106             coord_f = lp_build_add(coord_bld, coord_f, offset);
 107          }
 108          coord = lp_build_fract_safe(coord_bld, coord_f);
 109          coord = lp_build_mul(coord_bld, coord, length_f);
 110          coord = lp_build_itrunc(coord_bld, coord);
 111       }
 112       break;
 113
 114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 117       break;
 118
 119    case PIPE_TEX_WRAP_CLAMP:
 120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 125    default:
 126       assert(0);
 127    }
 128
 129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
 130                                   out_offset, out_i);
 131 }
 132
 133
 134 /**
 135  * Build LLVM code for texture coord wrapping, for nearest filtering,
 136  * for float texcoords.
 137  * \param coord  the incoming texcoord (s,t or r)
 138  * \param length  the texture size along one dimension
 139  * \param offset  the texel offset along the coord axis
 140  * \param is_pot  if TRUE, length is a power of two
 141  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 142  * \param icoord  the texcoord after wrapping, as int
 143  */
 144 static void
 145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
 146                                    LLVMValueRef coord,
 147                                    LLVMValueRef length,
 148                                    LLVMValueRef offset,
 149                                    boolean is_pot,
 150                                    unsigned wrap_mode,
 151                                    LLVMValueRef *icoord)
 152 {
 153    struct lp_build_context *coord_bld = &bld->coord_bld;
 154    LLVMValueRef length_minus_one;
 155
 156    switch(wrap_mode) {
 157    case PIPE_TEX_WRAP_REPEAT:
 158       if (offset) {
 159          /* this is definitely not ideal for POT case */
 160          offset = lp_build_int_to_float(coord_bld, offset);
 161          offset = lp_build_div(coord_bld, offset, length);
 162          coord = lp_build_add(coord_bld, coord, offset);
 163       }
 164       /* take fraction, unnormalize */
 165       coord = lp_build_fract_safe(coord_bld, coord);
 166       coord = lp_build_mul(coord_bld, coord, length);
 167       *icoord = lp_build_itrunc(coord_bld, coord);
 168       break;
 169    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 170       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 171       if (bld->static_sampler_state->normalized_coords) {
 172          /* scale coord to length */
 173          coord = lp_build_mul(coord_bld, coord, length);
 174       }
 175       if (offset) {
 176          offset = lp_build_int_to_float(coord_bld, offset);
 177          coord = lp_build_add(coord_bld, coord, offset);
 178       }
 179       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 180                              length_minus_one);
 181       *icoord = lp_build_itrunc(coord_bld, coord);
 182       break;
 183
 184    case PIPE_TEX_WRAP_CLAMP:
 185    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 186    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 187    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 188    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 189    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 190    default:
 191       assert(0);
 192    }
 193 }
 194
 195
 196 /**
 197  * Build LLVM code for texture coord wrapping, for linear filtering,
 198  * for scaled integer texcoords.
 199  * \param block_length  is the length of the pixel block along the
 200  *                      coordinate axis
 201  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
 202  * \param coord_f  the incoming texcoord (s,t or r) as float vec
 203  * \param length  the texture size along one dimension
 204  * \param stride  pixel stride along the coordinate axis (in bytes)
 205  * \param offset  the texel offset along the coord axis
 206  * \param is_pot  if TRUE, length is a power of two
 207  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 208  * \param offset0  resulting relative offset for coord0
 209  * \param offset1  resulting relative offset for coord0 + 1
 210  * \param i0  resulting sub-block pixel coordinate for coord0
 211  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 212  */
 213 static void
 214 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 215                                 unsigned block_length,
 216                                 LLVMValueRef coord0,
 217                                 LLVMValueRef *weight_i,
 218                                 LLVMValueRef coord_f,
 219                                 LLVMValueRef length,
 220                                 LLVMValueRef stride,
 221                                 LLVMValueRef offset,
 222                                 boolean is_pot,
 223                                 unsigned wrap_mode,
 224                                 LLVMValueRef *offset0,
 225                                 LLVMValueRef *offset1,
 226                                 LLVMValueRef *i0,
 227                                 LLVMValueRef *i1)
 228 {
 229    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 230    LLVMBuilderRef builder = bld->gallivm->builder;
 231    LLVMValueRef length_minus_one;
 232    LLVMValueRef lmask, umask, mask;
 233
 234    /*
 235     * If the pixel block covers more than one pixel then there is no easy
 236     * way to calculate offset1 relative to offset0. Instead, compute them
 237     * independently. Otherwise, try to compute offset0 and offset1 with
 238     * a single stride multiplication.
 239     */
 240
 241    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 242
 243    if (block_length != 1) {
 244       LLVMValueRef coord1;
 245       switch(wrap_mode) {
 246       case PIPE_TEX_WRAP_REPEAT:
 247          if (is_pot) {
 248             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 249             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 250             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 251          }
 252          else {
 253             LLVMValueRef mask;
 254             LLVMValueRef weight;
 255             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 256             if (offset) {
 257                offset = lp_build_int_to_float(&bld->coord_bld, offset);
 258                offset = lp_build_div(&bld->coord_bld, offset, length_f);
 259                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 260             }
 261             lp_build_coord_repeat_npot_linear(bld, coord_f,
 262                                               length, length_f,
 263                                               &coord0, &weight);
 264             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 265                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 266             coord1 = LLVMBuildAnd(builder,
 267                                   lp_build_add(int_coord_bld, coord0,
 268                                                int_coord_bld->one),
 269                                   mask, "");
 270             weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
 271             *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
 272          }
 273          break;
 274
 275       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 276          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 277          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
 278                                 length_minus_one);
 279          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
 280                                 length_minus_one);
 281          break;
 282
 283       case PIPE_TEX_WRAP_CLAMP:
 284       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 285       case PIPE_TEX_WRAP_MIRROR_REPEAT:
 286       case PIPE_TEX_WRAP_MIRROR_CLAMP:
 287       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 288       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 289       default:
 290          assert(0);
 291          coord0 = int_coord_bld->zero;
 292          coord1 = int_coord_bld->zero;
 293          break;
 294       }
 295       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
 296                                      offset0, i0);
 297       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
 298                                      offset1, i1);
 299       return;
 300    }
 301
 302    *i0 = int_coord_bld->zero;
 303    *i1 = int_coord_bld->zero;
 304
 305    switch(wrap_mode) {
 306    case PIPE_TEX_WRAP_REPEAT:
 307       if (is_pot) {
 308          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 309       }
 310       else {
 311          LLVMValueRef weight;
 312          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 313          if (offset) {
 314             offset = lp_build_int_to_float(&bld->coord_bld, offset);
 315             offset = lp_build_div(&bld->coord_bld, offset, length_f);
 316             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 317          }
 318          lp_build_coord_repeat_npot_linear(bld, coord_f,
 319                                            length, length_f,
 320                                            &coord0, &weight);
 321          weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
 322          *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
 323       }
 324
 325       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 326                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 327
 328       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 329       *offset1 = LLVMBuildAnd(builder,
 330                               lp_build_add(int_coord_bld, *offset0, stride),
 331                               mask, "");
 332       break;
 333
 334    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 335       /* XXX this might be slower than the separate path
 336        * on some newer cpus. With sse41 this is 8 instructions vs. 7
 337        * - at least on SNB this is almost certainly slower since
 338        * min/max are cheaper than selects, and the muls aren't bad.
 339        */
 340       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 341                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 342       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 343                                PIPE_FUNC_LESS, coord0, length_minus_one);
 344
 345       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 346       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 347
 348       mask = LLVMBuildAnd(builder, lmask, umask, "");
 349
 350       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 351       *offset1 = lp_build_add(int_coord_bld,
 352                               *offset0,
 353                               LLVMBuildAnd(builder, stride, mask, ""));
 354       break;
 355
 356    case PIPE_TEX_WRAP_CLAMP:
 357    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 358    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 359    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 360    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 361    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 362    default:
 363       assert(0);
 364       *offset0 = int_coord_bld->zero;
 365       *offset1 = int_coord_bld->zero;
 366       break;
 367    }
 368 }
 369
 370
 371 /**
 372  * Build LLVM code for texture coord wrapping, for linear filtering,
 373  * for float texcoords.
 374  * \param block_length  is the length of the pixel block along the
 375  *                      coordinate axis
 376  * \param coord  the incoming texcoord (s,t or r)
 377  * \param length  the texture size along one dimension
 378  * \param offset  the texel offset along the coord axis
 379  * \param is_pot  if TRUE, length is a power of two
 380  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 381  * \param coord0  the first texcoord after wrapping, as int
 382  * \param coord1  the second texcoord after wrapping, as int
 383  * \param weight  the filter weight as int (0-255)
 384  * \param force_nearest  if this coord actually uses nearest filtering
 385  */
 386 static void
 387 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
 388                                   unsigned block_length,
 389                                   LLVMValueRef coord,
 390                                   LLVMValueRef length,
 391                                   LLVMValueRef offset,
 392                                   boolean is_pot,
 393                                   unsigned wrap_mode,
 394                                   LLVMValueRef *coord0,
 395                                   LLVMValueRef *coord1,
 396                                   LLVMValueRef *weight,
 397                                   unsigned force_nearest)
 398 {
 399    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 400    struct lp_build_context *coord_bld = &bld->coord_bld;
 401    LLVMBuilderRef builder = bld->gallivm->builder;
 402    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 403    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
 404
 405    switch(wrap_mode) {
 406    case PIPE_TEX_WRAP_REPEAT:
 407       if (is_pot) {
 408          /* mul by size and subtract 0.5 */
 409          coord = lp_build_mul(coord_bld, coord, length);
 410          if (offset) {
 411             offset = lp_build_int_to_float(coord_bld, offset);
 412             coord = lp_build_add(coord_bld, coord, offset);
 413          }
 414          if (!force_nearest)
 415             coord = lp_build_sub(coord_bld, coord, half);
 416          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 417          /* convert to int, compute lerp weight */
 418          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 419          *coord1 = lp_build_ifloor(coord_bld, *coord1);
 420          /* repeat wrap */
 421          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
 422          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
 423          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
 424       }
 425       else {
 426          LLVMValueRef mask;
 427          if (offset) {
 428             offset = lp_build_int_to_float(coord_bld, offset);
 429             offset = lp_build_div(coord_bld, offset, length);
 430             coord = lp_build_add(coord_bld, coord, offset);
 431          }
 432          /* wrap with normalized floats is just fract */
 433          coord = lp_build_fract(coord_bld, coord);
 434          /* unnormalize */
 435          coord = lp_build_mul(coord_bld, coord, length);
 436          /*
 437           * we avoided the 0.5/length division, have to fix up wrong
 438           * edge cases with selects
 439           */
 440          *coord1 = lp_build_add(coord_bld, coord, half);
 441          coord = lp_build_sub(coord_bld, coord, half);
 442          *weight = lp_build_fract(coord_bld, coord);
 443          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 444                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
 445          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
 446          *coord0 = lp_build_itrunc(coord_bld, *coord0);
 447          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 448                                  PIPE_FUNC_LESS, *coord1, length);
 449          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
 450          *coord1 = lp_build_itrunc(coord_bld, *coord1);
 451       }
 452       break;
 453    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 454       if (bld->static_sampler_state->normalized_coords) {
 455          /* mul by tex size */
 456          coord = lp_build_mul(coord_bld, coord, length);
 457       }
 458       if (offset) {
 459          offset = lp_build_int_to_float(coord_bld, offset);
 460          coord = lp_build_add(coord_bld, coord, offset);
 461       }
 462       /* subtract 0.5 */
 463       if (!force_nearest) {
 464          coord = lp_build_sub(coord_bld, coord, half);
 465       }
 466       /* clamp to [0, length - 1] */
 467       coord = lp_build_min(coord_bld, coord, length_minus_one);
 468       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 469       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 470       /* convert to int, compute lerp weight */
 471       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
 472       /* coord1 = min(coord1, length-1) */
 473       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
 474       *coord1 = lp_build_itrunc(coord_bld, *coord1);
 475       break;
 476    default:
 477       assert(0);
 478       *coord0 = int_coord_bld->zero;
 479       *coord1 = int_coord_bld->zero;
 480       *weight = coord_bld->zero;
 481       break;
 482    }
 483    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
 484    *weight = lp_build_itrunc(coord_bld, *weight);
 485    return;
 486 }
 487
 488
 489 /**
 490  * Fetch texels for image with nearest sampling.
 491  * Return filtered color as two vectors of 16-bit fixed point values.
 492  */
 493 static void
 494 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
 495                                     LLVMValueRef data_ptr,
 496                                     LLVMValueRef offset,
 497                                     LLVMValueRef x_subcoord,
 498                                     LLVMValueRef y_subcoord,
 499                                     LLVMValueRef *colors)
 500 {
 501    /*
 502     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 503     *
 504     *   rgba0 rgba1 rgba2 rgba3
 505     *
 506     * bit cast them into 16 x u8
 507     *
 508     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 509     *
 510     * unpack them into two 8 x i16:
 511     *
 512     *   r0 g0 b0 a0 r1 g1 b1 a1
 513     *   r2 g2 b2 a2 r3 g3 b3 a3
 514     *
 515     * The higher 8 bits of the resulting elements will be zero.
 516     */
 517    LLVMBuilderRef builder = bld->gallivm->builder;
 518    LLVMValueRef rgba8;
 519    struct lp_build_context u8n;
 520    LLVMTypeRef u8n_vec_type;
 521
 522    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 523    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 524
 525    if (util_format_is_rgba8_variant(bld->format_desc)) {
 526       /*
 527        * Given the format is a rgba8, just read the pixels as is,
 528        * without any swizzling. Swizzling will be done later.
 529        */
 530       rgba8 = lp_build_gather(bld->gallivm,
 531                               bld->texel_type.length,
 532                               bld->format_desc->block.bits,
 533                               bld->texel_type.width,
 534                               data_ptr, offset, TRUE);
 535
 536       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 537    }
 538    else {
 539       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 540                                       bld->format_desc,
 541                                       u8n.type,
 542                                       data_ptr, offset,
 543                                       x_subcoord,
 544                                       y_subcoord);
 545    }
 546
 547    *colors = rgba8;
 548 }
 549
 550
 551 /**
 552  * Sample a single texture image with nearest sampling.
 553  * If sampling a cube texture, r = cube face in [0,5].
 554  * Return filtered color as two vectors of 16-bit fixed point values.
 555  */
 556 static void
 557 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 558                               LLVMValueRef int_size,
 559                               LLVMValueRef row_stride_vec,
 560                               LLVMValueRef img_stride_vec,
 561                               LLVMValueRef data_ptr,
 562                               LLVMValueRef mipoffsets,
 563                               LLVMValueRef s,
 564                               LLVMValueRef t,
 565                               LLVMValueRef r,
 566                               const LLVMValueRef *offsets,
 567                               LLVMValueRef *colors)
 568 {
 569    const unsigned dims = bld->dims;
 570    LLVMBuilderRef builder = bld->gallivm->builder;
 571    struct lp_build_context i32;
 572    LLVMTypeRef i32_vec_type;
 573    LLVMValueRef i32_c8;
 574    LLVMValueRef width_vec, height_vec, depth_vec;
 575    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
 576    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
 577    LLVMValueRef x_stride;
 578    LLVMValueRef x_offset, offset;
 579    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 580
 581    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 582
 583    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
 584
 585    lp_build_extract_image_sizes(bld,
 586                                 &bld->int_size_bld,
 587                                 bld->int_coord_type,
 588                                 int_size,
 589                                 &width_vec,
 590                                 &height_vec,
 591                                 &depth_vec);
 592
 593    s_float = s; t_float = t; r_float = r;
 594
 595    if (bld->static_sampler_state->normalized_coords) {
 596       LLVMValueRef scaled_size;
 597       LLVMValueRef flt_size;
 598
 599       /* scale size by 256 (8 fractional bits) */
 600       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 601
 602       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 603
 604       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 605    }
 606    else {
 607       /* scale coords by 256 (8 fractional bits) */
 608       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 609       if (dims >= 2)
 610          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 611       if (dims >= 3)
 612          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 613    }
 614
 615    /* convert float to int */
 616    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 617    if (dims >= 2)
 618       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 619    if (dims >= 3)
 620       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 621
 622    /* compute floor (shift right 8) */
 623    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
 624    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 625    if (dims >= 2)
 626       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 627    if (dims >= 3)
 628       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 629
 630    /* add texel offsets */
 631    if (offsets[0]) {
 632       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
 633       if (dims >= 2) {
 634          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
 635          if (dims >= 3) {
 636             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
 637          }
 638       }
 639    }
 640
 641    /* get pixel, row, image strides */
 642    x_stride = lp_build_const_vec(bld->gallivm,
 643                                  bld->int_coord_bld.type,
 644                                  bld->format_desc->block.bits/8);
 645
 646    /* Do texcoord wrapping, compute texel offset */
 647    lp_build_sample_wrap_nearest_int(bld,
 648                                     bld->format_desc->block.width,
 649                                     s_ipart, s_float,
 650                                     width_vec, x_stride, offsets[0],
 651                                     bld->static_texture_state->pot_width,
 652                                     bld->static_sampler_state->wrap_s,
 653                                     &x_offset, &x_subcoord);
 654    offset = x_offset;
 655    if (dims >= 2) {
 656       LLVMValueRef y_offset;
 657       lp_build_sample_wrap_nearest_int(bld,
 658                                        bld->format_desc->block.height,
 659                                        t_ipart, t_float,
 660                                        height_vec, row_stride_vec, offsets[1],
 661                                        bld->static_texture_state->pot_height,
 662                                        bld->static_sampler_state->wrap_t,
 663                                        &y_offset, &y_subcoord);
 664       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
 665       if (dims >= 3) {
 666          LLVMValueRef z_offset;
 667          lp_build_sample_wrap_nearest_int(bld,
 668                                           1, /* block length (depth) */
 669                                           r_ipart, r_float,
 670                                           depth_vec, img_stride_vec, offsets[2],
 671                                           bld->static_texture_state->pot_depth,
 672                                           bld->static_sampler_state->wrap_r,
 673                                           &z_offset, &z_subcoord);
 674          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 675       }
 676    }
 677    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 678        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 679        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 680       LLVMValueRef z_offset;
 681       /* The r coord is the cube face in [0,5] or array layer */
 682       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 683       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 684    }
 685    if (mipoffsets) {
 686       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 687    }
 688
 689    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 690                                        x_subcoord, y_subcoord,
 691                                        colors);
 692 }
 693
 694
 695 /**
 696  * Sample a single texture image with nearest sampling.
 697  * If sampling a cube texture, r = cube face in [0,5].
 698  * Return filtered color as two vectors of 16-bit fixed point values.
 699  * Does address calcs (except offsets) with floats.
 700  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
 701  */
 702 static void
 703 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
 704                                      LLVMValueRef int_size,
 705                                      LLVMValueRef row_stride_vec,
 706                                      LLVMValueRef img_stride_vec,
 707                                      LLVMValueRef data_ptr,
 708                                      LLVMValueRef mipoffsets,
 709                                      LLVMValueRef s,
 710                                      LLVMValueRef t,
 711                                      LLVMValueRef r,
 712                                      const LLVMValueRef *offsets,
 713                                      LLVMValueRef *colors)
 714    {
 715    const unsigned dims = bld->dims;
 716    LLVMValueRef width_vec, height_vec, depth_vec;
 717    LLVMValueRef offset;
 718    LLVMValueRef x_subcoord, y_subcoord;
 719    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
 720    LLVMValueRef flt_size;
 721
 722    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 723
 724    lp_build_extract_image_sizes(bld,
 725                                 &bld->float_size_bld,
 726                                 bld->coord_type,
 727                                 flt_size,
 728                                 &width_vec,
 729                                 &height_vec,
 730                                 &depth_vec);
 731
 732    /* Do texcoord wrapping */
 733    lp_build_sample_wrap_nearest_float(bld,
 734                                       s, width_vec, offsets[0],
 735                                       bld->static_texture_state->pot_width,
 736                                       bld->static_sampler_state->wrap_s,
 737                                       &x_icoord);
 738
 739    if (dims >= 2) {
 740       lp_build_sample_wrap_nearest_float(bld,
 741                                          t, height_vec, offsets[1],
 742                                          bld->static_texture_state->pot_height,
 743                                          bld->static_sampler_state->wrap_t,
 744                                          &y_icoord);
 745
 746       if (dims >= 3) {
 747          lp_build_sample_wrap_nearest_float(bld,
 748                                             r, depth_vec, offsets[2],
 749                                             bld->static_texture_state->pot_depth,
 750                                             bld->static_sampler_state->wrap_r,
 751                                             &z_icoord);
 752       }
 753    }
 754    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 755        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 756        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 757       z_icoord = r;
 758    }
 759
 760    /*
 761     * From here on we deal with ints, and we should split up the 256bit
 762     * vectors manually for better generated code.
 763     */
 764
 765    /*
 766     * compute texel offsets -
 767     * cannot do offset calc with floats, difficult for block-based formats,
 768     * and not enough precision anyway.
 769     */
 770    lp_build_sample_offset(&bld->int_coord_bld,
 771                           bld->format_desc,
 772                           x_icoord, y_icoord,
 773                           z_icoord,
 774                           row_stride_vec, img_stride_vec,
 775                           &offset,
 776                           &x_subcoord, &y_subcoord);
 777    if (mipoffsets) {
 778       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 779    }
 780
 781    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 782                                        x_subcoord, y_subcoord,
 783                                        colors);
 784 }
 785
 786
 787 /**
 788  * Fetch texels for image with linear sampling.
 789  * Return filtered color as two vectors of 16-bit fixed point values.
 790  */
 791 static void
 792 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
 793                                    LLVMValueRef data_ptr,
 794                                    LLVMValueRef offset[2][2][2],
 795                                    LLVMValueRef x_subcoord[2],
 796                                    LLVMValueRef y_subcoord[2],
 797                                    LLVMValueRef s_fpart,
 798                                    LLVMValueRef t_fpart,
 799                                    LLVMValueRef r_fpart,
 800                                    LLVMValueRef *colors)
 801 {
 802    const unsigned dims = bld->dims;
 803    LLVMBuilderRef builder = bld->gallivm->builder;
 804    struct lp_build_context u8n;
 805    LLVMTypeRef u8n_vec_type;
 806    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 807    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 808    LLVMValueRef shuffle;
 809    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
 810    LLVMValueRef packed;
 811    unsigned i, j, k;
 812    unsigned numj, numk;
 813
 814    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 815    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 816
 817    /*
 818     * Transform 4 x i32 in
 819     *
 820     *   s_fpart = {s0, s1, s2, s3}
 821     *
 822     * where each value is between 0 and 0xff,
 823     *
 824     * into one 16 x i20
 825     *
 826     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
 827     *
 828     * and likewise for t_fpart. There is no risk of loosing precision here
 829     * since the fractional parts only use the lower 8bits.
 830     */
 831    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
 832    if (dims >= 2)
 833       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
 834    if (dims >= 3)
 835       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
 836
 837    for (j = 0; j < u8n.type.length; j += 4) {
 838 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 839       unsigned subindex = 0;
 840 #else
 841       unsigned subindex = 3;
 842 #endif
 843       LLVMValueRef index;
 844
 845       index = LLVMConstInt(elem_type, j + subindex, 0);
 846       for (i = 0; i < 4; ++i)
 847          shuffles[j + i] = index;
 848    }
 849
 850    shuffle = LLVMConstVector(shuffles, u8n.type.length);
 851
 852    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
 853                                     shuffle, "");
 854    if (dims >= 2) {
 855       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
 856                                        shuffle, "");
 857    }
 858    if (dims >= 3) {
 859       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
 860                                        shuffle, "");
 861    }
 862
 863    /*
 864     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 865     *
 866     *   rgba0 rgba1 rgba2 rgba3
 867     *
 868     * bit cast them into 16 x u8
 869     *
 870     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 871     *
 872     * unpack them into two 8 x i16:
 873     *
 874     *   r0 g0 b0 a0 r1 g1 b1 a1
 875     *   r2 g2 b2 a2 r3 g3 b3 a3
 876     *
 877     * The higher 8 bits of the resulting elements will be zero.
 878     */
 879    numj = 1 + (dims >= 2);
 880    numk = 1 + (dims >= 3);
 881
 882    for (k = 0; k < numk; k++) {
 883       for (j = 0; j < numj; j++) {
 884          for (i = 0; i < 2; i++) {
 885             LLVMValueRef rgba8;
 886
 887             if (util_format_is_rgba8_variant(bld->format_desc)) {
 888                /*
 889                 * Given the format is a rgba8, just read the pixels as is,
 890                 * without any swizzling. Swizzling will be done later.
 891                 */
 892                rgba8 = lp_build_gather(bld->gallivm,
 893                                        bld->texel_type.length,
 894                                        bld->format_desc->block.bits,
 895                                        bld->texel_type.width,
 896                                        data_ptr, offset[k][j][i], TRUE);
 897
 898                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 899             }
 900             else {
 901                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 902                                                bld->format_desc,
 903                                                u8n.type,
 904                                                data_ptr, offset[k][j][i],
 905                                                x_subcoord[i],
 906                                                y_subcoord[j]);
 907             }
 908
 909             neighbors[k][j][i] = rgba8;
 910          }
 911       }
 912    }
 913
 914    /*
 915     * Linear interpolation with 8.8 fixed point.
 916     */
 917    if (bld->static_sampler_state->force_nearest_s) {
 918       /* special case 1-D lerp */
 919       packed = lp_build_lerp(&u8n,
 920                              t_fpart,
 921                              neighbors[0][0][0],
 922                              neighbors[0][0][1],
 923                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 924    }
 925    else if (bld->static_sampler_state->force_nearest_t) {
 926       /* special case 1-D lerp */
 927       packed = lp_build_lerp(&u8n,
 928                              s_fpart,
 929                              neighbors[0][0][0],
 930                              neighbors[0][0][1],
 931                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 932    }
 933    else {
 934       /* general 1/2/3-D lerping */
 935       if (dims == 1) {
 936          packed = lp_build_lerp(&u8n,
 937                                 s_fpart,
 938                                 neighbors[0][0][0],
 939                                 neighbors[0][0][1],
 940                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
 941       } else if (dims == 2) {
 942          /* 2-D lerp */
 943          packed = lp_build_lerp_2d(&u8n,
 944                                    s_fpart, t_fpart,
 945                                    neighbors[0][0][0],
 946                                    neighbors[0][0][1],
 947                                    neighbors[0][1][0],
 948                                    neighbors[0][1][1],
 949                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 950       } else {
 951          /* 3-D lerp */
 952          assert(dims == 3);
 953          packed = lp_build_lerp_3d(&u8n,
 954                                    s_fpart, t_fpart, r_fpart,
 955                                    neighbors[0][0][0],
 956                                    neighbors[0][0][1],
 957                                    neighbors[0][1][0],
 958                                    neighbors[0][1][1],
 959                                    neighbors[1][0][0],
 960                                    neighbors[1][0][1],
 961                                    neighbors[1][1][0],
 962                                    neighbors[1][1][1],
 963                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 964       }
 965    }
 966
 967    *colors = packed;
 968 }
 969
 970 /**
 971  * Sample a single texture image with (bi-)(tri-)linear sampling.
 972  * Return filtered color as two vectors of 16-bit fixed point values.
 973  */
 974 static void
 975 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 976                              LLVMValueRef int_size,
 977                              LLVMValueRef row_stride_vec,
 978                              LLVMValueRef img_stride_vec,
 979                              LLVMValueRef data_ptr,
 980                              LLVMValueRef mipoffsets,
 981                              LLVMValueRef s,
 982                              LLVMValueRef t,
 983                              LLVMValueRef r,
 984                              const LLVMValueRef *offsets,
 985                              LLVMValueRef *colors)
 986 {
 987    const unsigned dims = bld->dims;
 988    LLVMBuilderRef builder = bld->gallivm->builder;
 989    struct lp_build_context i32;
 990    LLVMTypeRef i32_vec_type;
 991    LLVMValueRef i32_c8, i32_c128, i32_c255;
 992    LLVMValueRef width_vec, height_vec, depth_vec;
 993    LLVMValueRef s_ipart, s_fpart, s_float;
 994    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
 995    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
 996    LLVMValueRef x_stride, y_stride, z_stride;
 997    LLVMValueRef x_offset0, x_offset1;
 998    LLVMValueRef y_offset0, y_offset1;
 999    LLVMValueRef z_offset0, z_offset1;
1000    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1001    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
1002    unsigned x, y, z;
1003
1004    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
1005
1006    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
1007
1008    lp_build_extract_image_sizes(bld,
1009                                 &bld->int_size_bld,
1010                                 bld->int_coord_type,
1011                                 int_size,
1012                                 &width_vec,
1013                                 &height_vec,
1014                                 &depth_vec);
1015
1016    s_float = s; t_float = t; r_float = r;
1017
1018    if (bld->static_sampler_state->normalized_coords) {
1019       LLVMValueRef scaled_size;
1020       LLVMValueRef flt_size;
1021
1022       /* scale size by 256 (8 fractional bits) */
1023       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1024
1025       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1026
1027       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1028    }
1029    else {
1030       /* scale coords by 256 (8 fractional bits) */
1031       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1032       if (dims >= 2)
1033          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1034       if (dims >= 3)
1035          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1036    }
1037
1038    /* convert float to int */
1039    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1040    if (dims >= 2)
1041       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1042    if (dims >= 3)
1043       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
1044
1045    /* subtract 0.5 (add -128) */
1046    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1047    if (!bld->static_sampler_state->force_nearest_s) {
1048       s = LLVMBuildAdd(builder, s, i32_c128, "");
1049    }
1050    if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
1051       t = LLVMBuildAdd(builder, t, i32_c128, "");
1052    }
1053    if (dims >= 3) {
1054       r = LLVMBuildAdd(builder, r, i32_c128, "");
1055    }
1056
1057    /* compute floor (shift right 8) */
1058    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1059    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1060    if (dims >= 2)
1061       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1062    if (dims >= 3)
1063       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1064
1065    /* add texel offsets */
1066    if (offsets[0]) {
1067       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
1068       if (dims >= 2) {
1069          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
1070          if (dims >= 3) {
1071             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
1072          }
1073       }
1074    }
1075
1076    /* compute fractional part (AND with 0xff) */
1077    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1078    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1079    if (dims >= 2)
1080       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1081    if (dims >= 3)
1082       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1083
1084    /* get pixel, row and image strides */
1085    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1086                                  bld->format_desc->block.bits/8);
1087    y_stride = row_stride_vec;
1088    z_stride = img_stride_vec;
1089
1090    /* do texcoord wrapping and compute texel offsets */
1091    lp_build_sample_wrap_linear_int(bld,
1092                                    bld->format_desc->block.width,
1093                                    s_ipart, &s_fpart, s_float,
1094                                    width_vec, x_stride, offsets[0],
1095                                    bld->static_texture_state->pot_width,
1096                                    bld->static_sampler_state->wrap_s,
1097                                    &x_offset0, &x_offset1,
1098                                    &x_subcoord[0], &x_subcoord[1]);
1099
1100    /* add potential cube/array/mip offsets now as they are constant per pixel */
1101    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1102        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
1103        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
1104       LLVMValueRef z_offset;
1105       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1106       /* The r coord is the cube face in [0,5] or array layer */
1107       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1108       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1109    }
1110    if (mipoffsets) {
1111       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1112       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1113    }
1114
1115    for (z = 0; z < 2; z++) {
1116       for (y = 0; y < 2; y++) {
1117          offset[z][y][0] = x_offset0;
1118          offset[z][y][1] = x_offset1;
1119       }
1120    }
1121
1122    if (dims >= 2) {
1123       lp_build_sample_wrap_linear_int(bld,
1124                                       bld->format_desc->block.height,
1125                                       t_ipart, &t_fpart, t_float,
1126                                       height_vec, y_stride, offsets[1],
1127                                       bld->static_texture_state->pot_height,
1128                                       bld->static_sampler_state->wrap_t,
1129                                       &y_offset0, &y_offset1,
1130                                       &y_subcoord[0], &y_subcoord[1]);
1131
1132       for (z = 0; z < 2; z++) {
1133          for (x = 0; x < 2; x++) {
1134             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1135                                            offset[z][0][x], y_offset0);
1136             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1137                                            offset[z][1][x], y_offset1);
1138          }
1139       }
1140    }
1141
1142    if (dims >= 3) {
1143       lp_build_sample_wrap_linear_int(bld,
1144                                       1, /* block length (depth) */
1145                                       r_ipart, &r_fpart, r_float,
1146                                       depth_vec, z_stride, offsets[2],
1147                                       bld->static_texture_state->pot_depth,
1148                                       bld->static_sampler_state->wrap_r,
1149                                       &z_offset0, &z_offset1,
1150                                       &z_subcoord[0], &z_subcoord[1]);
1151       for (y = 0; y < 2; y++) {
1152          for (x = 0; x < 2; x++) {
1153             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1154                                            offset[0][y][x], z_offset0);
1155             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1156                                            offset[1][y][x], z_offset1);
1157          }
1158       }
1159    }
1160
1161    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1162                                       x_subcoord, y_subcoord,
1163                                       s_fpart, t_fpart, r_fpart,
1164                                       colors);
1165 }
1166
1167
1168 /**
1169  * Sample a single texture image with (bi-)(tri-)linear sampling.
1170  * Return filtered color as two vectors of 16-bit fixed point values.
1171  * Does address calcs (except offsets) with floats.
1172  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1173  */
1174 static void
1175 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1176                                     LLVMValueRef int_size,
1177                                     LLVMValueRef row_stride_vec,
1178                                     LLVMValueRef img_stride_vec,
1179                                     LLVMValueRef data_ptr,
1180                                     LLVMValueRef mipoffsets,
1181                                     LLVMValueRef s,
1182                                     LLVMValueRef t,
1183                                     LLVMValueRef r,
1184                                     const LLVMValueRef *offsets,
1185                                     LLVMValueRef *colors)
1186 {
1187    const unsigned dims = bld->dims;
1188    LLVMValueRef width_vec, height_vec, depth_vec;
1189    LLVMValueRef s_fpart;
1190    LLVMValueRef t_fpart = NULL;
1191    LLVMValueRef r_fpart = NULL;
1192    LLVMValueRef x_stride, y_stride, z_stride;
1193    LLVMValueRef x_offset0, x_offset1;
1194    LLVMValueRef y_offset0, y_offset1;
1195    LLVMValueRef z_offset0, z_offset1;
1196    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1197    LLVMValueRef x_subcoord[2], y_subcoord[2];
1198    LLVMValueRef flt_size;
1199    LLVMValueRef x_icoord0, x_icoord1;
1200    LLVMValueRef y_icoord0, y_icoord1;
1201    LLVMValueRef z_icoord0, z_icoord1;
1202    unsigned x, y, z;
1203
1204    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1205
1206    lp_build_extract_image_sizes(bld,
1207                                 &bld->float_size_bld,
1208                                 bld->coord_type,
1209                                 flt_size,
1210                                 &width_vec,
1211                                 &height_vec,
1212                                 &depth_vec);
1213
1214    /* do texcoord wrapping and compute texel offsets */
1215    lp_build_sample_wrap_linear_float(bld,
1216                                      bld->format_desc->block.width,
1217                                      s, width_vec, offsets[0],
1218                                      bld->static_texture_state->pot_width,
1219                                      bld->static_sampler_state->wrap_s,
1220                                      &x_icoord0, &x_icoord1,
1221                                      &s_fpart,
1222                                      bld->static_sampler_state->force_nearest_s);
1223
1224    if (dims >= 2) {
1225       lp_build_sample_wrap_linear_float(bld,
1226                                         bld->format_desc->block.height,
1227                                         t, height_vec, offsets[1],
1228                                         bld->static_texture_state->pot_height,
1229                                         bld->static_sampler_state->wrap_t,
1230                                         &y_icoord0, &y_icoord1,
1231                                         &t_fpart,
1232                                         bld->static_sampler_state->force_nearest_t);
1233
1234       if (dims >= 3) {
1235          lp_build_sample_wrap_linear_float(bld,
1236                                            1, /* block length (depth) */
1237                                            r, depth_vec, offsets[2],
1238                                            bld->static_texture_state->pot_depth,
1239                                            bld->static_sampler_state->wrap_r,
1240                                            &z_icoord0, &z_icoord1,
1241                                            &r_fpart, 0);
1242       }
1243    }
1244
1245    /*
1246     * From here on we deal with ints, and we should split up the 256bit
1247     * vectors manually for better generated code.
1248     */
1249
1250    /* get pixel, row and image strides */
1251    x_stride = lp_build_const_vec(bld->gallivm,
1252                                  bld->int_coord_bld.type,
1253                                  bld->format_desc->block.bits/8);
1254    y_stride = row_stride_vec;
1255    z_stride = img_stride_vec;
1256
1257    /*
1258     * compute texel offset -
1259     * cannot do offset calc with floats, difficult for block-based formats,
1260     * and not enough precision anyway.
1261     */
1262    lp_build_sample_partial_offset(&bld->int_coord_bld,
1263                                   bld->format_desc->block.width,
1264                                   x_icoord0, x_stride,
1265                                   &x_offset0, &x_subcoord[0]);
1266    lp_build_sample_partial_offset(&bld->int_coord_bld,
1267                                   bld->format_desc->block.width,
1268                                   x_icoord1, x_stride,
1269                                   &x_offset1, &x_subcoord[1]);
1270
1271    /* add potential cube/array/mip offsets now as they are constant per pixel */
1272    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1273        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
1274        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
1275       LLVMValueRef z_offset;
1276       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1277       /* The r coord is the cube face in [0,5] or array layer */
1278       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1279       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1280    }
1281    if (mipoffsets) {
1282       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1283       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1284    }
1285
1286    for (z = 0; z < 2; z++) {
1287       for (y = 0; y < 2; y++) {
1288          offset[z][y][0] = x_offset0;
1289          offset[z][y][1] = x_offset1;
1290       }
1291    }
1292
1293    if (dims >= 2) {
1294       lp_build_sample_partial_offset(&bld->int_coord_bld,
1295                                      bld->format_desc->block.height,
1296                                      y_icoord0, y_stride,
1297                                      &y_offset0, &y_subcoord[0]);
1298       lp_build_sample_partial_offset(&bld->int_coord_bld,
1299                                      bld->format_desc->block.height,
1300                                      y_icoord1, y_stride,
1301                                      &y_offset1, &y_subcoord[1]);
1302       for (z = 0; z < 2; z++) {
1303          for (x = 0; x < 2; x++) {
1304             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1305                                            offset[z][0][x], y_offset0);
1306             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1307                                            offset[z][1][x], y_offset1);
1308          }
1309       }
1310    }
1311
1312    if (dims >= 3) {
1313       LLVMValueRef z_subcoord[2];
1314       lp_build_sample_partial_offset(&bld->int_coord_bld,
1315                                      1,
1316                                      z_icoord0, z_stride,
1317                                      &z_offset0, &z_subcoord[0]);
1318       lp_build_sample_partial_offset(&bld->int_coord_bld,
1319                                      1,
1320                                      z_icoord1, z_stride,
1321                                      &z_offset1, &z_subcoord[1]);
1322       for (y = 0; y < 2; y++) {
1323          for (x = 0; x < 2; x++) {
1324             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1325                                            offset[0][y][x], z_offset0);
1326             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1327                                            offset[1][y][x], z_offset1);
1328          }
1329       }
1330    }
1331
1332    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1333                                       x_subcoord, y_subcoord,
1334                                       s_fpart, t_fpart, r_fpart,
1335                                       colors);
1336 }
1337
1338
1339 /**
1340  * Sample the texture/mipmap using given image filter and mip filter.
1341  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1342  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1343  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1344  */
1345 static void
1346 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1347                        unsigned img_filter,
1348                        unsigned mip_filter,
1349                        LLVMValueRef s,
1350                        LLVMValueRef t,
1351                        LLVMValueRef r,
1352                        const LLVMValueRef *offsets,
1353                        LLVMValueRef ilevel0,
1354                        LLVMValueRef ilevel1,
1355                        LLVMValueRef lod_fpart,
1356                        LLVMValueRef colors_var)
1357 {
1358    LLVMBuilderRef builder = bld->gallivm->builder;
1359    LLVMValueRef size0;
1360    LLVMValueRef size1;
1361    LLVMValueRef row_stride0_vec = NULL;
1362    LLVMValueRef row_stride1_vec = NULL;
1363    LLVMValueRef img_stride0_vec = NULL;
1364    LLVMValueRef img_stride1_vec = NULL;
1365    LLVMValueRef data_ptr0;
1366    LLVMValueRef data_ptr1;
1367    LLVMValueRef mipoff0 = NULL;
1368    LLVMValueRef mipoff1 = NULL;
1369    LLVMValueRef colors0;
1370    LLVMValueRef colors1;
1371
1372    /* sample the first mipmap level */
1373    lp_build_mipmap_level_sizes(bld, ilevel0,
1374                                &size0,
1375                                &row_stride0_vec, &img_stride0_vec);
1376    if (bld->num_lods == 1) {
1377       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1378    }
1379    else {
1380       /* This path should work for num_lods 1 too but slightly less efficient */
1381       data_ptr0 = bld->base_ptr;
1382       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1383    }
1384
1385    if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1386       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1387          lp_build_sample_image_nearest_afloat(bld,
1388                                               size0,
1389                                               row_stride0_vec, img_stride0_vec,
1390                                               data_ptr0, mipoff0, s, t, r, offsets,
1391                                               &colors0);
1392       }
1393       else {
1394          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1395          lp_build_sample_image_linear_afloat(bld,
1396                                              size0,
1397                                              row_stride0_vec, img_stride0_vec,
1398                                              data_ptr0, mipoff0, s, t, r, offsets,
1399                                              &colors0);
1400       }
1401    }
1402    else {
1403       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1404          lp_build_sample_image_nearest(bld,
1405                                        size0,
1406                                        row_stride0_vec, img_stride0_vec,
1407                                        data_ptr0, mipoff0, s, t, r, offsets,
1408                                        &colors0);
1409       }
1410       else {
1411          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1412          lp_build_sample_image_linear(bld,
1413                                       size0,
1414                                       row_stride0_vec, img_stride0_vec,
1415                                       data_ptr0, mipoff0, s, t, r, offsets,
1416                                       &colors0);
1417       }
1418    }
1419
1420    /* Store the first level's colors in the output variables */
1421    LLVMBuildStore(builder, colors0, colors_var);
1422
1423    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1424       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1425                                                      bld->levelf_bld.type, 256.0);
1426       LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
1427       struct lp_build_if_state if_ctx;
1428       LLVMValueRef need_lerp;
1429       unsigned num_quads = bld->coord_bld.type.length / 4;
1430       unsigned i;
1431
1432       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1433       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1434
1435       /* need_lerp = lod_fpart > 0 */
1436       if (bld->num_lods == 1) {
1437          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1438                                    lod_fpart, bld->leveli_bld.zero,
1439                                    "need_lerp");
1440       }
1441       else {
1442          /*
1443           * We'll do mip filtering if any of the quads need it.
1444           * It might be better to split the vectors here and only fetch/filter
1445           * quads which need it.
1446           */
1447          /*
1448           * We need to clamp lod_fpart here since we can get negative
1449           * values which would screw up filtering if not all
1450           * lod_fpart values have same sign.
1451           * We can however then skip the greater than comparison.
1452           */
1453          lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
1454                                   bld->leveli_bld.zero);
1455          need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
1456       }
1457
1458       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1459       {
1460          struct lp_build_context u8n_bld;
1461
1462          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1463
1464          /* sample the second mipmap level */
1465          lp_build_mipmap_level_sizes(bld, ilevel1,
1466                                      &size1,
1467                                      &row_stride1_vec, &img_stride1_vec);
1468          if (bld->num_lods == 1) {
1469             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1470          }
1471          else {
1472             data_ptr1 = bld->base_ptr;
1473             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1474          }
1475
1476          if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1477             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1478                lp_build_sample_image_nearest_afloat(bld,
1479                                                     size1,
1480                                                     row_stride1_vec, img_stride1_vec,
1481                                                     data_ptr1, mipoff1, s, t, r, offsets,
1482                                                     &colors1);
1483             }
1484             else {
1485                lp_build_sample_image_linear_afloat(bld,
1486                                                    size1,
1487                                                    row_stride1_vec, img_stride1_vec,
1488                                                    data_ptr1, mipoff1, s, t, r, offsets,
1489                                                    &colors1);
1490             }
1491          }
1492          else {
1493             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1494                lp_build_sample_image_nearest(bld,
1495                                              size1,
1496                                              row_stride1_vec, img_stride1_vec,
1497                                              data_ptr1, mipoff1, s, t, r, offsets,
1498                                              &colors1);
1499             }
1500             else {
1501                lp_build_sample_image_linear(bld,
1502                                             size1,
1503                                             row_stride1_vec, img_stride1_vec,
1504                                             data_ptr1, mipoff1, s, t, r, offsets,
1505                                             &colors1);
1506             }
1507          }
1508
1509          /* interpolate samples from the two mipmap levels */
1510
1511          if (num_quads == 1 && bld->num_lods == 1) {
1512             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1513             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1514
1515 #if HAVE_LLVM == 0x208
1516             /* This was a work-around for a bug in LLVM 2.8.
1517              * Evidently, something goes wrong in the construction of the
1518              * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
1519              * to force the vector to be properly constructed.
1520              * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1521              */
1522 #error Unsupported
1523 #endif
1524          }
1525          else {
1526             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1527             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
1528             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1529
1530             /* Take the LSB of lod_fpart */
1531             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1532
1533             /* Broadcast each lod weight into their respective channels */
1534             for (i = 0; i < u8n_bld.type.length; ++i) {
1535                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1536             }
1537             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1538                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
1539          }
1540
1541          colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1542                                  colors0, colors1,
1543                                  LP_BLD_LERP_PRESCALED_WEIGHTS);
1544
1545          LLVMBuildStore(builder, colors0, colors_var);
1546       }
1547       lp_build_endif(&if_ctx);
1548    }
1549 }
1550
1551
1552
1553 /**
1554  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1555  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1556  * but only limited texture coord wrap modes.
1557  */
1558 void
1559 lp_build_sample_aos(struct lp_build_sample_context *bld,
1560                     unsigned sampler_unit,
1561                     LLVMValueRef s,
1562                     LLVMValueRef t,
1563                     LLVMValueRef r,
1564                     const LLVMValueRef *offsets,
1565                     LLVMValueRef lod_ipart,
1566                     LLVMValueRef lod_fpart,
1567                     LLVMValueRef ilevel0,
1568                     LLVMValueRef ilevel1,
1569                     LLVMValueRef texel_out[4])
1570 {
1571    struct lp_build_context *int_bld = &bld->int_bld;
1572    LLVMBuilderRef builder = bld->gallivm->builder;
1573    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1574    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1575    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1576    const unsigned dims = bld->dims;
1577    LLVMValueRef packed_var, packed;
1578    LLVMValueRef unswizzled[4];
1579    struct lp_build_context u8n_bld;
1580
1581    /* we only support the common/simple wrap modes at this time */
1582    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1583    if (dims >= 2)
1584       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1585    if (dims >= 3)
1586       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1587
1588
1589    /* make 8-bit unorm builder context */
1590    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1591
1592    /*
1593     * Get/interpolate texture colors.
1594     */
1595
1596    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1597
1598    if (min_filter == mag_filter) {
1599       /* no need to distinguish between minification and magnification */
1600       lp_build_sample_mipmap(bld,
1601                              min_filter, mip_filter,
1602                              s, t, r, offsets,
1603                              ilevel0, ilevel1, lod_fpart,
1604                              packed_var);
1605    }
1606    else {
1607       /* Emit conditional to choose min image filter or mag image filter
1608        * depending on the lod being > 0 or <= 0, respectively.
1609        */
1610       struct lp_build_if_state if_ctx;
1611       LLVMValueRef minify;
1612
1613       /*
1614        * XXX this should take all lods into account, if some are min
1615        * some max probably could hack up the coords/weights in the linear
1616        * path with selects to work for nearest.
1617        * If that's just two quads sitting next to each other it seems
1618        * quite ok to do the same filtering method on both though, at
1619        * least unless we have explicit lod (and who uses different
1620        * min/mag filter with that?)
1621        */
1622       if (bld->num_lods > 1)
1623          lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1624                                               lp_build_const_int32(bld->gallivm, 0), "");
1625
1626       /* minify = lod >= 0.0 */
1627       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1628                              lod_ipart, int_bld->zero, "");
1629
1630       lp_build_if(&if_ctx, bld->gallivm, minify);
1631       {
1632          /* Use the minification filter */
1633          lp_build_sample_mipmap(bld,
1634                                 min_filter, mip_filter,
1635                                 s, t, r, offsets,
1636                                 ilevel0, ilevel1, lod_fpart,
1637                                 packed_var);
1638       }
1639       lp_build_else(&if_ctx);
1640       {
1641          /* Use the magnification filter */
1642          lp_build_sample_mipmap(bld,
1643                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1644                                 s, t, r, offsets,
1645                                 ilevel0, NULL, NULL,
1646                                 packed_var);
1647       }
1648       lp_build_endif(&if_ctx);
1649    }
1650
1651    packed = LLVMBuildLoad(builder, packed_var, "");
1652
1653    /*
1654     * Convert to SoA and swizzle.
1655     */
1656    lp_build_rgba8_to_fi32_soa(bld->gallivm,
1657                              bld->texel_type,
1658                              packed, unswizzled);
1659
1660    if (util_format_is_rgba8_variant(bld->format_desc)) {
1661       lp_build_format_swizzle_soa(bld->format_desc,
1662                                   &bld->texel_bld,
1663                                   unswizzled, texel_out);
1664    }
1665    else {
1666       texel_out[0] = unswizzled[0];
1667       texel_out[1] = unswizzled[1];
1668       texel_out[2] = unswizzled[2];
1669       texel_out[3] = unswizzled[3];
1670    }
1671 }