src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- AoS.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/format/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_bitarit.h"
  50 #include "lp_bld_logic.h"
  51 #include "lp_bld_swizzle.h"
  52 #include "lp_bld_pack.h"
  53 #include "lp_bld_flow.h"
  54 #include "lp_bld_gather.h"
  55 #include "lp_bld_format.h"
  56 #include "lp_bld_init.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_quad.h"
  60
  61
  62 /**
  63  * Build LLVM code for texture coord wrapping, for nearest filtering,
  64  * for scaled integer texcoords.
  65  * \param block_length  is the length of the pixel block along the
  66  *                      coordinate axis
  67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
  68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
  69  * \param length  the texture size along one dimension
  70  * \param stride  pixel stride along the coordinate axis (in bytes)
  71  * \param offset  the texel offset along the coord axis
  72  * \param is_pot  if TRUE, length is a power of two
  73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  74  * \param out_offset  byte offset for the wrapped coordinate
  75  * \param out_i  resulting sub-block pixel coordinate for coord0
  76  */
  77 static void
  78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  79                                  unsigned block_length,
  80                                  LLVMValueRef coord,
  81                                  LLVMValueRef coord_f,
  82                                  LLVMValueRef length,
  83                                  LLVMValueRef stride,
  84                                  LLVMValueRef offset,
  85                                  boolean is_pot,
  86                                  unsigned wrap_mode,
  87                                  LLVMValueRef *out_offset,
  88                                  LLVMValueRef *out_i)
  89 {
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef length_minus_one;
  93
  94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  95
  96    switch(wrap_mode) {
  97    case PIPE_TEX_WRAP_REPEAT:
  98       if(is_pot)
  99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
 100       else {
 101          struct lp_build_context *coord_bld = &bld->coord_bld;
 102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 103          if (offset) {
 104             offset = lp_build_int_to_float(coord_bld, offset);
 105             offset = lp_build_div(coord_bld, offset, length_f);
 106             coord_f = lp_build_add(coord_bld, coord_f, offset);
 107          }
 108          coord = lp_build_fract_safe(coord_bld, coord_f);
 109          coord = lp_build_mul(coord_bld, coord, length_f);
 110          coord = lp_build_itrunc(coord_bld, coord);
 111       }
 112       break;
 113
 114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 117       break;
 118
 119    case PIPE_TEX_WRAP_CLAMP:
 120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 125    default:
 126       assert(0);
 127    }
 128
 129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
 130                                   out_offset, out_i);
 131 }
 132
 133
 134 /**
 135  * Helper to compute the first coord and the weight for
 136  * linear wrap repeat npot textures
 137  */
 138 static void
 139 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
 140                                       LLVMValueRef coord_f,
 141                                       LLVMValueRef length_i,
 142                                       LLVMValueRef length_f,
 143                                       LLVMValueRef *coord0_i,
 144                                       LLVMValueRef *weight_i)
 145 {
 146    struct lp_build_context *coord_bld = &bld->coord_bld;
 147    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 148    struct lp_build_context abs_coord_bld;
 149    struct lp_type abs_type;
 150    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 151                                                 int_coord_bld->one);
 152    LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
 153
 154    /* wrap with normalized floats is just fract */
 155    coord_f = lp_build_fract(coord_bld, coord_f);
 156    /* mul by size */
 157    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 158    /* convert to int, compute lerp weight */
 159    coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
 160
 161    /* At this point we don't have any negative numbers so use non-signed
 162     * build context which might help on some archs.
 163     */
 164    abs_type = coord_bld->type;
 165    abs_type.sign = 0;
 166    lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
 167    *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
 168
 169    /* subtract 0.5 (add -128) */
 170    i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
 171    *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
 172
 173    /* compute fractional part (AND with 0xff) */
 174    i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
 175    *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
 176
 177    /* compute floor (shift right 8) */
 178    i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
 179    *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
 180    /*
 181     * we avoided the 0.5/length division before the repeat wrap,
 182     * now need to fix up edge cases with selects
 183     */
 184    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 185                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 186    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 187    /*
 188     * We should never get values too large - except if coord was nan or inf,
 189     * in which case things go terribly wrong...
 190     * Alternatively, could use fract_safe above...
 191     */
 192    *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
 193 }
 194
 195
 196 /**
 197  * Build LLVM code for texture coord wrapping, for linear filtering,
 198  * for scaled integer texcoords.
 199  * \param block_length  is the length of the pixel block along the
 200  *                      coordinate axis
 201  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
 202  * \param coord_f  the incoming texcoord (s,t or r) as float vec
 203  * \param length  the texture size along one dimension
 204  * \param stride  pixel stride along the coordinate axis (in bytes)
 205  * \param offset  the texel offset along the coord axis
 206  * \param is_pot  if TRUE, length is a power of two
 207  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 208  * \param offset0  resulting relative offset for coord0
 209  * \param offset1  resulting relative offset for coord0 + 1
 210  * \param i0  resulting sub-block pixel coordinate for coord0
 211  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 212  */
 213 static void
 214 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 215                                 unsigned block_length,
 216                                 LLVMValueRef coord0,
 217                                 LLVMValueRef *weight_i,
 218                                 LLVMValueRef coord_f,
 219                                 LLVMValueRef length,
 220                                 LLVMValueRef stride,
 221                                 LLVMValueRef offset,
 222                                 boolean is_pot,
 223                                 unsigned wrap_mode,
 224                                 LLVMValueRef *offset0,
 225                                 LLVMValueRef *offset1,
 226                                 LLVMValueRef *i0,
 227                                 LLVMValueRef *i1)
 228 {
 229    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 230    LLVMBuilderRef builder = bld->gallivm->builder;
 231    LLVMValueRef length_minus_one;
 232    LLVMValueRef lmask, umask, mask;
 233
 234    /*
 235     * If the pixel block covers more than one pixel then there is no easy
 236     * way to calculate offset1 relative to offset0. Instead, compute them
 237     * independently. Otherwise, try to compute offset0 and offset1 with
 238     * a single stride multiplication.
 239     */
 240
 241    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 242
 243    if (block_length != 1) {
 244       LLVMValueRef coord1;
 245       switch(wrap_mode) {
 246       case PIPE_TEX_WRAP_REPEAT:
 247          if (is_pot) {
 248             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 249             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 250             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 251          }
 252          else {
 253             LLVMValueRef mask;
 254             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 255             if (offset) {
 256                offset = lp_build_int_to_float(&bld->coord_bld, offset);
 257                offset = lp_build_div(&bld->coord_bld, offset, length_f);
 258                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 259             }
 260             lp_build_coord_repeat_npot_linear_int(bld, coord_f,
 261                                                   length, length_f,
 262                                                   &coord0, weight_i);
 263             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 264                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 265             coord1 = LLVMBuildAnd(builder,
 266                                   lp_build_add(int_coord_bld, coord0,
 267                                                int_coord_bld->one),
 268                                   mask, "");
 269          }
 270          break;
 271
 272       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 273          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 274          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
 275                                 length_minus_one);
 276          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
 277                                 length_minus_one);
 278          break;
 279
 280       case PIPE_TEX_WRAP_CLAMP:
 281       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 282       case PIPE_TEX_WRAP_MIRROR_REPEAT:
 283       case PIPE_TEX_WRAP_MIRROR_CLAMP:
 284       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 285       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 286       default:
 287          assert(0);
 288          coord0 = int_coord_bld->zero;
 289          coord1 = int_coord_bld->zero;
 290          break;
 291       }
 292       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
 293                                      offset0, i0);
 294       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
 295                                      offset1, i1);
 296       return;
 297    }
 298
 299    *i0 = int_coord_bld->zero;
 300    *i1 = int_coord_bld->zero;
 301
 302    switch(wrap_mode) {
 303    case PIPE_TEX_WRAP_REPEAT:
 304       if (is_pot) {
 305          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 306       }
 307       else {
 308          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
 309          if (offset) {
 310             offset = lp_build_int_to_float(&bld->coord_bld, offset);
 311             offset = lp_build_div(&bld->coord_bld, offset, length_f);
 312             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
 313          }
 314          lp_build_coord_repeat_npot_linear_int(bld, coord_f,
 315                                                length, length_f,
 316                                                &coord0, weight_i);
 317       }
 318
 319       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
 320                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 321
 322       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 323       *offset1 = LLVMBuildAnd(builder,
 324                               lp_build_add(int_coord_bld, *offset0, stride),
 325                               mask, "");
 326       break;
 327
 328    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 329       /* XXX this might be slower than the separate path
 330        * on some newer cpus. With sse41 this is 8 instructions vs. 7
 331        * - at least on SNB this is almost certainly slower since
 332        * min/max are cheaper than selects, and the muls aren't bad.
 333        */
 334       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 335                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 336       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 337                                PIPE_FUNC_LESS, coord0, length_minus_one);
 338
 339       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 340       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 341
 342       mask = LLVMBuildAnd(builder, lmask, umask, "");
 343
 344       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
 345       *offset1 = lp_build_add(int_coord_bld,
 346                               *offset0,
 347                               LLVMBuildAnd(builder, stride, mask, ""));
 348       break;
 349
 350    case PIPE_TEX_WRAP_CLAMP:
 351    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 352    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 353    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 354    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 355    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 356    default:
 357       assert(0);
 358       *offset0 = int_coord_bld->zero;
 359       *offset1 = int_coord_bld->zero;
 360       break;
 361    }
 362 }
 363
 364
 365 /**
 366  * Fetch texels for image with nearest sampling.
 367  * Return filtered color as two vectors of 16-bit fixed point values.
 368  */
 369 static void
 370 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
 371                                     LLVMValueRef data_ptr,
 372                                     LLVMValueRef offset,
 373                                     LLVMValueRef x_subcoord,
 374                                     LLVMValueRef y_subcoord,
 375                                     LLVMValueRef *colors)
 376 {
 377    /*
 378     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 379     *
 380     *   rgba0 rgba1 rgba2 rgba3
 381     *
 382     * bit cast them into 16 x u8
 383     *
 384     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 385     *
 386     * unpack them into two 8 x i16:
 387     *
 388     *   r0 g0 b0 a0 r1 g1 b1 a1
 389     *   r2 g2 b2 a2 r3 g3 b3 a3
 390     *
 391     * The higher 8 bits of the resulting elements will be zero.
 392     */
 393    LLVMBuilderRef builder = bld->gallivm->builder;
 394    LLVMValueRef rgba8;
 395    struct lp_build_context u8n;
 396    LLVMTypeRef u8n_vec_type;
 397    struct lp_type fetch_type;
 398
 399    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 400    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 401
 402    fetch_type = lp_type_uint(bld->texel_type.width);
 403    if (util_format_is_rgba8_variant(bld->format_desc)) {
 404       /*
 405        * Given the format is a rgba8, just read the pixels as is,
 406        * without any swizzling. Swizzling will be done later.
 407        */
 408       rgba8 = lp_build_gather(bld->gallivm,
 409                               bld->texel_type.length,
 410                               bld->format_desc->block.bits,
 411                               fetch_type,
 412                               TRUE,
 413                               data_ptr, offset, TRUE);
 414
 415       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 416    }
 417    else {
 418       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 419                                       bld->format_desc,
 420                                       u8n.type,
 421                                       TRUE,
 422                                       data_ptr, offset,
 423                                       x_subcoord,
 424                                       y_subcoord,
 425                                       bld->cache);
 426    }
 427
 428    *colors = rgba8;
 429 }
 430
 431
 432 /**
 433  * Sample a single texture image with nearest sampling.
 434  * If sampling a cube texture, r = cube face in [0,5].
 435  * Return filtered color as two vectors of 16-bit fixed point values.
 436  */
 437 static void
 438 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 439                               LLVMValueRef int_size,
 440                               LLVMValueRef row_stride_vec,
 441                               LLVMValueRef img_stride_vec,
 442                               LLVMValueRef data_ptr,
 443                               LLVMValueRef mipoffsets,
 444                               LLVMValueRef s,
 445                               LLVMValueRef t,
 446                               LLVMValueRef r,
 447                               const LLVMValueRef *offsets,
 448                               LLVMValueRef *colors)
 449 {
 450    const unsigned dims = bld->dims;
 451    struct lp_build_context i32;
 452    LLVMValueRef width_vec, height_vec, depth_vec;
 453    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
 454    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
 455    LLVMValueRef x_stride;
 456    LLVMValueRef x_offset, offset;
 457    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 458
 459    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 460
 461    lp_build_extract_image_sizes(bld,
 462                                 &bld->int_size_bld,
 463                                 bld->int_coord_type,
 464                                 int_size,
 465                                 &width_vec,
 466                                 &height_vec,
 467                                 &depth_vec);
 468
 469    s_float = s; t_float = t; r_float = r;
 470
 471    if (bld->static_sampler_state->normalized_coords) {
 472       LLVMValueRef flt_size;
 473
 474       flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
 475
 476       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 477    }
 478
 479    /* convert float to int */
 480    /* For correct rounding, need floor, not truncation here.
 481     * Note that in some cases (clamp to edge, no texel offsets) we
 482     * could use a non-signed build context which would help archs
 483     * greatly which don't have arch rounding.
 484     */
 485    s_ipart = lp_build_ifloor(&bld->coord_bld, s);
 486    if (dims >= 2)
 487       t_ipart = lp_build_ifloor(&bld->coord_bld, t);
 488    if (dims >= 3)
 489       r_ipart = lp_build_ifloor(&bld->coord_bld, r);
 490
 491    /* add texel offsets */
 492    if (offsets[0]) {
 493       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
 494       if (dims >= 2) {
 495          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
 496          if (dims >= 3) {
 497             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
 498          }
 499       }
 500    }
 501
 502    /* get pixel, row, image strides */
 503    x_stride = lp_build_const_vec(bld->gallivm,
 504                                  bld->int_coord_bld.type,
 505                                  bld->format_desc->block.bits/8);
 506
 507    /* Do texcoord wrapping, compute texel offset */
 508    lp_build_sample_wrap_nearest_int(bld,
 509                                     bld->format_desc->block.width,
 510                                     s_ipart, s_float,
 511                                     width_vec, x_stride, offsets[0],
 512                                     bld->static_texture_state->pot_width,
 513                                     bld->static_sampler_state->wrap_s,
 514                                     &x_offset, &x_subcoord);
 515    offset = x_offset;
 516    if (dims >= 2) {
 517       LLVMValueRef y_offset;
 518       lp_build_sample_wrap_nearest_int(bld,
 519                                        bld->format_desc->block.height,
 520                                        t_ipart, t_float,
 521                                        height_vec, row_stride_vec, offsets[1],
 522                                        bld->static_texture_state->pot_height,
 523                                        bld->static_sampler_state->wrap_t,
 524                                        &y_offset, &y_subcoord);
 525       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
 526       if (dims >= 3) {
 527          LLVMValueRef z_offset;
 528          lp_build_sample_wrap_nearest_int(bld,
 529                                           1, /* block length (depth) */
 530                                           r_ipart, r_float,
 531                                           depth_vec, img_stride_vec, offsets[2],
 532                                           bld->static_texture_state->pot_depth,
 533                                           bld->static_sampler_state->wrap_r,
 534                                           &z_offset, &z_subcoord);
 535          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 536       }
 537    }
 538    if (has_layer_coord(bld->static_texture_state->target)) {
 539       LLVMValueRef z_offset;
 540       /* The r coord is the cube face in [0,5] or array layer */
 541       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 542       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
 543    }
 544    if (mipoffsets) {
 545       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 546    }
 547
 548    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
 549                                        x_subcoord, y_subcoord,
 550                                        colors);
 551 }
 552
 553
 554 /**
 555  * Fetch texels for image with linear sampling.
 556  * Return filtered color as two vectors of 16-bit fixed point values.
 557  */
 558 static void
 559 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
 560                                    LLVMValueRef data_ptr,
 561                                    LLVMValueRef offset[2][2][2],
 562                                    LLVMValueRef x_subcoord[2],
 563                                    LLVMValueRef y_subcoord[2],
 564                                    LLVMValueRef s_fpart,
 565                                    LLVMValueRef t_fpart,
 566                                    LLVMValueRef r_fpart,
 567                                    LLVMValueRef *colors)
 568 {
 569    const unsigned dims = bld->dims;
 570    LLVMBuilderRef builder = bld->gallivm->builder;
 571    struct lp_build_context u8n;
 572    LLVMTypeRef u8n_vec_type;
 573    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
 574    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 575    LLVMValueRef shuffle;
 576    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
 577    LLVMValueRef packed;
 578    unsigned i, j, k;
 579    unsigned numj, numk;
 580
 581    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 582    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 583
 584    /*
 585     * Transform 4 x i32 in
 586     *
 587     *   s_fpart = {s0, s1, s2, s3}
 588     *
 589     * where each value is between 0 and 0xff,
 590     *
 591     * into one 16 x i20
 592     *
 593     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
 594     *
 595     * and likewise for t_fpart. There is no risk of loosing precision here
 596     * since the fractional parts only use the lower 8bits.
 597     */
 598    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
 599    if (dims >= 2)
 600       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
 601    if (dims >= 3)
 602       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
 603
 604    for (j = 0; j < u8n.type.length; j += 4) {
 605 #if UTIL_ARCH_LITTLE_ENDIAN
 606       unsigned subindex = 0;
 607 #else
 608       unsigned subindex = 3;
 609 #endif
 610       LLVMValueRef index;
 611
 612       index = LLVMConstInt(elem_type, j + subindex, 0);
 613       for (i = 0; i < 4; ++i)
 614          shuffles[j + i] = index;
 615    }
 616
 617    shuffle = LLVMConstVector(shuffles, u8n.type.length);
 618
 619    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
 620                                     shuffle, "");
 621    if (dims >= 2) {
 622       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
 623                                        shuffle, "");
 624    }
 625    if (dims >= 3) {
 626       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
 627                                        shuffle, "");
 628    }
 629
 630    /*
 631     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 632     *
 633     *   rgba0 rgba1 rgba2 rgba3
 634     *
 635     * bit cast them into 16 x u8
 636     *
 637     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 638     *
 639     * unpack them into two 8 x i16:
 640     *
 641     *   r0 g0 b0 a0 r1 g1 b1 a1
 642     *   r2 g2 b2 a2 r3 g3 b3 a3
 643     *
 644     * The higher 8 bits of the resulting elements will be zero.
 645     */
 646    numj = 1 + (dims >= 2);
 647    numk = 1 + (dims >= 3);
 648
 649    for (k = 0; k < numk; k++) {
 650       for (j = 0; j < numj; j++) {
 651          for (i = 0; i < 2; i++) {
 652             LLVMValueRef rgba8;
 653
 654             if (util_format_is_rgba8_variant(bld->format_desc)) {
 655                struct lp_type fetch_type;
 656                /*
 657                 * Given the format is a rgba8, just read the pixels as is,
 658                 * without any swizzling. Swizzling will be done later.
 659                 */
 660                fetch_type = lp_type_uint(bld->texel_type.width);
 661                rgba8 = lp_build_gather(bld->gallivm,
 662                                        bld->texel_type.length,
 663                                        bld->format_desc->block.bits,
 664                                        fetch_type,
 665                                        TRUE,
 666                                        data_ptr, offset[k][j][i], TRUE);
 667
 668                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 669             }
 670             else {
 671                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
 672                                                bld->format_desc,
 673                                                u8n.type,
 674                                                TRUE,
 675                                                data_ptr, offset[k][j][i],
 676                                                x_subcoord[i],
 677                                                y_subcoord[j],
 678                                                bld->cache);
 679             }
 680
 681             neighbors[k][j][i] = rgba8;
 682          }
 683       }
 684    }
 685
 686    /*
 687     * Linear interpolation with 8.8 fixed point.
 688     */
 689    if (bld->static_sampler_state->force_nearest_s) {
 690       /* special case 1-D lerp */
 691       packed = lp_build_lerp(&u8n,
 692                              t_fpart,
 693                              neighbors[0][0][0],
 694                              neighbors[0][0][1],
 695                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 696    }
 697    else if (bld->static_sampler_state->force_nearest_t) {
 698       /* special case 1-D lerp */
 699       packed = lp_build_lerp(&u8n,
 700                              s_fpart,
 701                              neighbors[0][0][0],
 702                              neighbors[0][0][1],
 703                              LP_BLD_LERP_PRESCALED_WEIGHTS);
 704    }
 705    else {
 706       /* general 1/2/3-D lerping */
 707       if (dims == 1) {
 708          packed = lp_build_lerp(&u8n,
 709                                 s_fpart,
 710                                 neighbors[0][0][0],
 711                                 neighbors[0][0][1],
 712                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
 713       } else if (dims == 2) {
 714          /* 2-D lerp */
 715          packed = lp_build_lerp_2d(&u8n,
 716                                    s_fpart, t_fpart,
 717                                    neighbors[0][0][0],
 718                                    neighbors[0][0][1],
 719                                    neighbors[0][1][0],
 720                                    neighbors[0][1][1],
 721                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 722       } else {
 723          /* 3-D lerp */
 724          assert(dims == 3);
 725          packed = lp_build_lerp_3d(&u8n,
 726                                    s_fpart, t_fpart, r_fpart,
 727                                    neighbors[0][0][0],
 728                                    neighbors[0][0][1],
 729                                    neighbors[0][1][0],
 730                                    neighbors[0][1][1],
 731                                    neighbors[1][0][0],
 732                                    neighbors[1][0][1],
 733                                    neighbors[1][1][0],
 734                                    neighbors[1][1][1],
 735                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
 736       }
 737    }
 738
 739    *colors = packed;
 740 }
 741
 742 /**
 743  * Sample a single texture image with (bi-)(tri-)linear sampling.
 744  * Return filtered color as two vectors of 16-bit fixed point values.
 745  */
 746 static void
 747 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 748                              LLVMValueRef int_size,
 749                              LLVMValueRef row_stride_vec,
 750                              LLVMValueRef img_stride_vec,
 751                              LLVMValueRef data_ptr,
 752                              LLVMValueRef mipoffsets,
 753                              LLVMValueRef s,
 754                              LLVMValueRef t,
 755                              LLVMValueRef r,
 756                              const LLVMValueRef *offsets,
 757                              LLVMValueRef *colors)
 758 {
 759    const unsigned dims = bld->dims;
 760    LLVMBuilderRef builder = bld->gallivm->builder;
 761    struct lp_build_context i32;
 762    LLVMValueRef i32_c8, i32_c128, i32_c255;
 763    LLVMValueRef width_vec, height_vec, depth_vec;
 764    LLVMValueRef s_ipart, s_fpart, s_float;
 765    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
 766    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
 767    LLVMValueRef x_stride, y_stride, z_stride;
 768    LLVMValueRef x_offset0, x_offset1;
 769    LLVMValueRef y_offset0, y_offset1;
 770    LLVMValueRef z_offset0, z_offset1;
 771    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
 772    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
 773    unsigned x, y, z;
 774
 775    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 776
 777    lp_build_extract_image_sizes(bld,
 778                                 &bld->int_size_bld,
 779                                 bld->int_coord_type,
 780                                 int_size,
 781                                 &width_vec,
 782                                 &height_vec,
 783                                 &depth_vec);
 784
 785    s_float = s; t_float = t; r_float = r;
 786
 787    if (bld->static_sampler_state->normalized_coords) {
 788       LLVMValueRef scaled_size;
 789       LLVMValueRef flt_size;
 790
 791       /* scale size by 256 (8 fractional bits) */
 792       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 793
 794       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 795
 796       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 797    }
 798    else {
 799       /* scale coords by 256 (8 fractional bits) */
 800       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 801       if (dims >= 2)
 802          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 803       if (dims >= 3)
 804          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 805    }
 806
 807    /* convert float to int */
 808    /* For correct rounding, need round to nearest, not truncation here.
 809     * Note that in some cases (clamp to edge, no texel offsets) we
 810     * could use a non-signed build context which would help archs which
 811     * don't have fptosi intrinsic with nearest rounding implemented.
 812     */
 813    s = lp_build_iround(&bld->coord_bld, s);
 814    if (dims >= 2)
 815       t = lp_build_iround(&bld->coord_bld, t);
 816    if (dims >= 3)
 817       r = lp_build_iround(&bld->coord_bld, r);
 818
 819    /* subtract 0.5 (add -128) */
 820    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
 821    if (!bld->static_sampler_state->force_nearest_s) {
 822       s = LLVMBuildAdd(builder, s, i32_c128, "");
 823    }
 824    if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
 825       t = LLVMBuildAdd(builder, t, i32_c128, "");
 826    }
 827    if (dims >= 3) {
 828       r = LLVMBuildAdd(builder, r, i32_c128, "");
 829    }
 830
 831    /* compute floor (shift right 8) */
 832    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
 833    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 834    if (dims >= 2)
 835       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 836    if (dims >= 3)
 837       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 838
 839    /* add texel offsets */
 840    if (offsets[0]) {
 841       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
 842       if (dims >= 2) {
 843          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
 844          if (dims >= 3) {
 845             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
 846          }
 847       }
 848    }
 849
 850    /* compute fractional part (AND with 0xff) */
 851    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
 852    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
 853    if (dims >= 2)
 854       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 855    if (dims >= 3)
 856       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 857
 858    /* get pixel, row and image strides */
 859    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
 860                                  bld->format_desc->block.bits/8);
 861    y_stride = row_stride_vec;
 862    z_stride = img_stride_vec;
 863
 864    /* do texcoord wrapping and compute texel offsets */
 865    lp_build_sample_wrap_linear_int(bld,
 866                                    bld->format_desc->block.width,
 867                                    s_ipart, &s_fpart, s_float,
 868                                    width_vec, x_stride, offsets[0],
 869                                    bld->static_texture_state->pot_width,
 870                                    bld->static_sampler_state->wrap_s,
 871                                    &x_offset0, &x_offset1,
 872                                    &x_subcoord[0], &x_subcoord[1]);
 873
 874    /* add potential cube/array/mip offsets now as they are constant per pixel */
 875    if (has_layer_coord(bld->static_texture_state->target)) {
 876       LLVMValueRef z_offset;
 877       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
 878       /* The r coord is the cube face in [0,5] or array layer */
 879       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
 880       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
 881    }
 882    if (mipoffsets) {
 883       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
 884       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
 885    }
 886
 887    for (z = 0; z < 2; z++) {
 888       for (y = 0; y < 2; y++) {
 889          offset[z][y][0] = x_offset0;
 890          offset[z][y][1] = x_offset1;
 891       }
 892    }
 893
 894    if (dims >= 2) {
 895       lp_build_sample_wrap_linear_int(bld,
 896                                       bld->format_desc->block.height,
 897                                       t_ipart, &t_fpart, t_float,
 898                                       height_vec, y_stride, offsets[1],
 899                                       bld->static_texture_state->pot_height,
 900                                       bld->static_sampler_state->wrap_t,
 901                                       &y_offset0, &y_offset1,
 902                                       &y_subcoord[0], &y_subcoord[1]);
 903
 904       for (z = 0; z < 2; z++) {
 905          for (x = 0; x < 2; x++) {
 906             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
 907                                            offset[z][0][x], y_offset0);
 908             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
 909                                            offset[z][1][x], y_offset1);
 910          }
 911       }
 912    }
 913
 914    if (dims >= 3) {
 915       lp_build_sample_wrap_linear_int(bld,
 916                                       1, /* block length (depth) */
 917                                       r_ipart, &r_fpart, r_float,
 918                                       depth_vec, z_stride, offsets[2],
 919                                       bld->static_texture_state->pot_depth,
 920                                       bld->static_sampler_state->wrap_r,
 921                                       &z_offset0, &z_offset1,
 922                                       &z_subcoord[0], &z_subcoord[1]);
 923       for (y = 0; y < 2; y++) {
 924          for (x = 0; x < 2; x++) {
 925             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
 926                                            offset[0][y][x], z_offset0);
 927             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
 928                                            offset[1][y][x], z_offset1);
 929          }
 930       }
 931    }
 932
 933    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
 934                                       x_subcoord, y_subcoord,
 935                                       s_fpart, t_fpart, r_fpart,
 936                                       colors);
 937 }
 938
 939
 940 /**
 941  * Sample the texture/mipmap using given image filter and mip filter.
 942  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 943  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 944  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 945  */
 946 static void
 947 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 948                        unsigned img_filter,
 949                        unsigned mip_filter,
 950                        LLVMValueRef s,
 951                        LLVMValueRef t,
 952                        LLVMValueRef r,
 953                        const LLVMValueRef *offsets,
 954                        LLVMValueRef ilevel0,
 955                        LLVMValueRef ilevel1,
 956                        LLVMValueRef lod_fpart,
 957                        LLVMValueRef colors_var)
 958 {
 959    LLVMBuilderRef builder = bld->gallivm->builder;
 960    LLVMValueRef size0;
 961    LLVMValueRef size1;
 962    LLVMValueRef row_stride0_vec = NULL;
 963    LLVMValueRef row_stride1_vec = NULL;
 964    LLVMValueRef img_stride0_vec = NULL;
 965    LLVMValueRef img_stride1_vec = NULL;
 966    LLVMValueRef data_ptr0;
 967    LLVMValueRef data_ptr1;
 968    LLVMValueRef mipoff0 = NULL;
 969    LLVMValueRef mipoff1 = NULL;
 970    LLVMValueRef colors0;
 971    LLVMValueRef colors1;
 972
 973    /* sample the first mipmap level */
 974    lp_build_mipmap_level_sizes(bld, ilevel0,
 975                                &size0,
 976                                &row_stride0_vec, &img_stride0_vec);
 977    if (bld->num_mips == 1) {
 978       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
 979    }
 980    else {
 981       /* This path should work for num_lods 1 too but slightly less efficient */
 982       data_ptr0 = bld->base_ptr;
 983       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
 984    }
 985
 986    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 987       lp_build_sample_image_nearest(bld,
 988                                     size0,
 989                                     row_stride0_vec, img_stride0_vec,
 990                                     data_ptr0, mipoff0, s, t, r, offsets,
 991                                     &colors0);
 992    }
 993    else {
 994       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 995       lp_build_sample_image_linear(bld,
 996                                    size0,
 997                                    row_stride0_vec, img_stride0_vec,
 998                                    data_ptr0, mipoff0, s, t, r, offsets,
 999                                    &colors0);
1000    }
1001
1002    /* Store the first level's colors in the output variables */
1003    LLVMBuildStore(builder, colors0, colors_var);
1004
1005    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1006       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1007                                                      bld->lodf_bld.type, 256.0);
1008       LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1009       struct lp_build_if_state if_ctx;
1010       LLVMValueRef need_lerp;
1011       unsigned num_quads = bld->coord_bld.type.length / 4;
1012       unsigned i;
1013
1014       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1015       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1016
1017       /* need_lerp = lod_fpart > 0 */
1018       if (bld->num_lods == 1) {
1019          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1020                                    lod_fpart, bld->lodi_bld.zero,
1021                                    "need_lerp");
1022       }
1023       else {
1024          /*
1025           * We'll do mip filtering if any of the quads need it.
1026           * It might be better to split the vectors here and only fetch/filter
1027           * quads which need it.
1028           */
1029          /*
1030           * We need to clamp lod_fpart here since we can get negative
1031           * values which would screw up filtering if not all
1032           * lod_fpart values have same sign.
1033           * We can however then skip the greater than comparison.
1034           */
1035          lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1036                                   bld->lodi_bld.zero);
1037          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1038       }
1039
1040       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1041       {
1042          struct lp_build_context u8n_bld;
1043
1044          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1045
1046          /* sample the second mipmap level */
1047          lp_build_mipmap_level_sizes(bld, ilevel1,
1048                                      &size1,
1049                                      &row_stride1_vec, &img_stride1_vec);
1050          if (bld->num_mips == 1) {
1051             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1052          }
1053          else {
1054             data_ptr1 = bld->base_ptr;
1055             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1056          }
1057
1058          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1059             lp_build_sample_image_nearest(bld,
1060                                           size1,
1061                                           row_stride1_vec, img_stride1_vec,
1062                                           data_ptr1, mipoff1, s, t, r, offsets,
1063                                           &colors1);
1064          }
1065          else {
1066             lp_build_sample_image_linear(bld,
1067                                          size1,
1068                                          row_stride1_vec, img_stride1_vec,
1069                                          data_ptr1, mipoff1, s, t, r, offsets,
1070                                          &colors1);
1071          }
1072
1073          /* interpolate samples from the two mipmap levels */
1074
1075          if (num_quads == 1 && bld->num_lods == 1) {
1076             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1077             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1078          }
1079          else {
1080             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1081             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1082             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1083
1084             /* Take the LSB of lod_fpart */
1085             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1086
1087             /* Broadcast each lod weight into their respective channels */
1088             for (i = 0; i < u8n_bld.type.length; ++i) {
1089                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1090             }
1091             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1092                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
1093          }
1094
1095          colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1096                                  colors0, colors1,
1097                                  LP_BLD_LERP_PRESCALED_WEIGHTS);
1098
1099          LLVMBuildStore(builder, colors0, colors_var);
1100       }
1101       lp_build_endif(&if_ctx);
1102    }
1103 }
1104
1105
1106
1107 /**
1108  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1109  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1110  * but only limited texture coord wrap modes.
1111  */
1112 void
1113 lp_build_sample_aos(struct lp_build_sample_context *bld,
1114                     unsigned sampler_unit,
1115                     LLVMValueRef s,
1116                     LLVMValueRef t,
1117                     LLVMValueRef r,
1118                     const LLVMValueRef *offsets,
1119                     LLVMValueRef lod_positive,
1120                     LLVMValueRef lod_fpart,
1121                     LLVMValueRef ilevel0,
1122                     LLVMValueRef ilevel1,
1123                     LLVMValueRef texel_out[4])
1124 {
1125    LLVMBuilderRef builder = bld->gallivm->builder;
1126    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1127    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1128    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1129    const unsigned dims = bld->dims;
1130    LLVMValueRef packed_var, packed;
1131    LLVMValueRef unswizzled[4];
1132    struct lp_build_context u8n_bld;
1133
1134    /* we only support the common/simple wrap modes at this time */
1135    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1136    if (dims >= 2)
1137       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1138    if (dims >= 3)
1139       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1140
1141
1142    /* make 8-bit unorm builder context */
1143    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1144
1145    /*
1146     * Get/interpolate texture colors.
1147     */
1148
1149    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1150
1151    if (min_filter == mag_filter) {
1152       /* no need to distinguish between minification and magnification */
1153       lp_build_sample_mipmap(bld,
1154                              min_filter, mip_filter,
1155                              s, t, r, offsets,
1156                              ilevel0, ilevel1, lod_fpart,
1157                              packed_var);
1158    }
1159    else {
1160       /* Emit conditional to choose min image filter or mag image filter
1161        * depending on the lod being > 0 or <= 0, respectively.
1162        */
1163       struct lp_build_if_state if_ctx;
1164
1165       /*
1166        * FIXME this should take all lods into account, if some are min
1167        * some max probably could hack up the weights in the linear
1168        * path with selects to work for nearest.
1169        */
1170       if (bld->num_lods > 1)
1171          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1172                                                 lp_build_const_int32(bld->gallivm, 0), "");
1173
1174       lod_positive = LLVMBuildTrunc(builder, lod_positive,
1175                                     LLVMInt1TypeInContext(bld->gallivm->context), "");
1176
1177       lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1178       {
1179          /* Use the minification filter */
1180          lp_build_sample_mipmap(bld,
1181                                 min_filter, mip_filter,
1182                                 s, t, r, offsets,
1183                                 ilevel0, ilevel1, lod_fpart,
1184                                 packed_var);
1185       }
1186       lp_build_else(&if_ctx);
1187       {
1188          /* Use the magnification filter */
1189          lp_build_sample_mipmap(bld,
1190                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1191                                 s, t, r, offsets,
1192                                 ilevel0, NULL, NULL,
1193                                 packed_var);
1194       }
1195       lp_build_endif(&if_ctx);
1196    }
1197
1198    packed = LLVMBuildLoad(builder, packed_var, "");
1199
1200    /*
1201     * Convert to SoA and swizzle.
1202     */
1203    lp_build_rgba8_to_fi32_soa(bld->gallivm,
1204                              bld->texel_type,
1205                              packed, unswizzled);
1206
1207    if (util_format_is_rgba8_variant(bld->format_desc)) {
1208       lp_build_format_swizzle_soa(bld->format_desc,
1209                                   &bld->texel_bld,
1210                                   unswizzled, texel_out);
1211    }
1212    else {
1213       texel_out[0] = unswizzled[0];
1214       texel_out[1] = unswizzled[1];
1215       texel_out[2] = unswizzled[2];
1216       texel_out[3] = unswizzled[3];
1217    }
1218 }