src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_shader_tokens.h"
  39 #include "util/u_debug.h"
  40 #include "util/u_dump.h"
  41 #include "util/u_memory.h"
  42 #include "util/u_math.h"
  43 #include "util/u_format.h"
  44 #include "util/u_cpu_detect.h"
  45 #include "lp_bld_debug.h"
  46 #include "lp_bld_type.h"
  47 #include "lp_bld_const.h"
  48 #include "lp_bld_conv.h"
  49 #include "lp_bld_arit.h"
  50 #include "lp_bld_bitarit.h"
  51 #include "lp_bld_logic.h"
  52 #include "lp_bld_printf.h"
  53 #include "lp_bld_swizzle.h"
  54 #include "lp_bld_flow.h"
  55 #include "lp_bld_gather.h"
  56 #include "lp_bld_format.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_struct.h"
  60 #include "lp_bld_quad.h"
  61 #include "lp_bld_pack.h"
  62
  63
  64 /**
  65  * Generate code to fetch a texel from a texture at int coords (x, y, z).
  66  * The computation depends on whether the texture is 1D, 2D or 3D.
  67  * The result, texel, will be float vectors:
  68  *   texel[0] = red values
  69  *   texel[1] = green values
  70  *   texel[2] = blue values
  71  *   texel[3] = alpha values
  72  */
  73 static void
  74 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  75                           unsigned sampler_unit,
  76                           LLVMValueRef width,
  77                           LLVMValueRef height,
  78                           LLVMValueRef depth,
  79                           LLVMValueRef x,
  80                           LLVMValueRef y,
  81                           LLVMValueRef z,
  82                           LLVMValueRef y_stride,
  83                           LLVMValueRef z_stride,
  84                           LLVMValueRef data_ptr,
  85                           LLVMValueRef mipoffsets,
  86                           LLVMValueRef texel_out[4])
  87 {
  88    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
  89    const unsigned dims = bld->dims;
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef offset;
  93    LLVMValueRef i, j;
  94    LLVMValueRef use_border = NULL;
  95
  96    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
  97    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
  98                                               static_state->min_img_filter,
  99                                               static_state->mag_img_filter)) {
 100       LLVMValueRef b1, b2;
 101       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 102       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 103       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 104    }
 105
 106    if (dims >= 2 &&
 107        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
 108                                               static_state->min_img_filter,
 109                                               static_state->mag_img_filter)) {
 110       LLVMValueRef b1, b2;
 111       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 112       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 113       if (use_border) {
 114          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 115          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 116       }
 117       else {
 118          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 119       }
 120    }
 121
 122    if (dims == 3 &&
 123        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
 124                                               static_state->min_img_filter,
 125                                               static_state->mag_img_filter)) {
 126       LLVMValueRef b1, b2;
 127       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 128       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 129       if (use_border) {
 130          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 131          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 132       }
 133       else {
 134          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 135       }
 136    }
 137
 138    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 139    lp_build_sample_offset(&bld->int_coord_bld,
 140                           bld->format_desc,
 141                           x, y, z, y_stride, z_stride,
 142                           &offset, &i, &j);
 143    if (mipoffsets) {
 144       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 145    }
 146
 147    if (use_border) {
 148       /* If we can sample the border color, it means that texcoords may
 149        * lie outside the bounds of the texture image.  We need to do
 150        * something to prevent reading out of bounds and causing a segfault.
 151        *
 152        * Simply AND the texture coords with !use_border.  This will cause
 153        * coords which are out of bounds to become zero.  Zero's guaranteed
 154        * to be inside the texture image.
 155        */
 156       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
 157    }
 158
 159    lp_build_fetch_rgba_soa(bld->gallivm,
 160                            bld->format_desc,
 161                            bld->texel_type,
 162                            data_ptr, offset,
 163                            i, j,
 164                            texel_out);
 165
 166    /*
 167     * Note: if we find an app which frequently samples the texture border
 168     * we might want to implement a true conditional here to avoid sampling
 169     * the texture whenever possible (since that's quite a bit of code).
 170     * Ex:
 171     *   if (use_border) {
 172     *      texel = border_color;
 173     *   }
 174     *   else {
 175     *      texel = sample_texture(coord);
 176     *   }
 177     * As it is now, we always sample the texture, then selectively replace
 178     * the texel color results with the border color.
 179     */
 180
 181    if (use_border) {
 182       /* select texel color or border color depending on use_border. */
 183      LLVMValueRef border_color_ptr =
 184          bld->dynamic_state->border_color(bld->dynamic_state,
 185                                           bld->gallivm, sampler_unit);
 186       const struct util_format_description *format_desc;
 187       int chan;
 188       format_desc = util_format_description(bld->static_texture_state->format);
 189       /*
 190        * Only replace channels which are actually present. The others should
 191        * get optimized away eventually by sampler_view swizzle anyway but it's
 192        * easier too as we'd need some extra logic for channels where we can't
 193        * determine the format directly otherwise.
 194        */
 195       for (chan = 0; chan < 4; chan++) {
 196          unsigned chan_s;
 197          /* reverse-map channel... */
 198          for (chan_s = 0; chan_s < 4; chan_s++) {
 199             if (chan_s == format_desc->swizzle[chan]) {
 200                break;
 201             }
 202          }
 203          if (chan_s <= 3) {
 204             LLVMValueRef border_chan =
 205                lp_build_array_get(bld->gallivm, border_color_ptr,
 206                                   lp_build_const_int32(bld->gallivm, chan));
 207             LLVMValueRef border_chan_vec =
 208                lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan);
 209
 210             if (!bld->texel_type.floating) {
 211                border_chan_vec = LLVMBuildBitCast(builder, border_chan_vec,
 212                                                   bld->texel_bld.vec_type, "");
 213             }
 214             else {
 215                /*
 216                 * For normalized format need to clamp border color (technically
 217                 * probably should also quantize the data). Really sucks doing this
 218                 * here but can't avoid at least for now since this is part of
 219                 * sampler state and texture format is part of sampler_view state.
 220                 */
 221                unsigned chan_type = format_desc->channel[chan_s].type;
 222                unsigned chan_norm = format_desc->channel[chan_s].normalized;
 223                if (chan_type == UTIL_FORMAT_TYPE_SIGNED && chan_norm) {
 224                   LLVMValueRef clamp_min;
 225                   clamp_min = lp_build_const_vec(bld->gallivm, bld->texel_type, -1.0F);
 226                   border_chan_vec = lp_build_clamp(&bld->texel_bld, border_chan_vec,
 227                                                    clamp_min,
 228                                                    bld->texel_bld.one);
 229                }
 230                else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED && chan_norm) {
 231                   border_chan_vec = lp_build_clamp(&bld->texel_bld, border_chan_vec,
 232                                                    bld->texel_bld.zero,
 233                                                    bld->texel_bld.one);
 234                }
 235                /* not exactly sure about all others but I think should be ok? */
 236             }
 237             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 238                                               border_chan_vec, texel_out[chan]);
 239          }
 240       }
 241    }
 242 }
 243
 244
 245 /**
 246  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 247  */
 248 static LLVMValueRef
 249 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 250                       LLVMValueRef coord)
 251 {
 252    struct lp_build_context *coord_bld = &bld->coord_bld;
 253    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 254    LLVMValueRef fract, flr, isOdd;
 255
 256    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
 257
 258    /* isOdd = flr & 1 */
 259    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
 260
 261    /* make coord positive or negative depending on isOdd */
 262    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 263
 264    /* convert isOdd to float */
 265    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 266
 267    /* add isOdd to coord */
 268    coord = lp_build_add(coord_bld, coord, isOdd);
 269
 270    return coord;
 271 }
 272
 273
 274 /**
 275  * Helper to compute the first coord and the weight for
 276  * linear wrap repeat npot textures
 277  */
 278 void
 279 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
 280                                   LLVMValueRef coord_f,
 281                                   LLVMValueRef length_i,
 282                                   LLVMValueRef length_f,
 283                                   LLVMValueRef *coord0_i,
 284                                   LLVMValueRef *weight_f)
 285 {
 286    struct lp_build_context *coord_bld = &bld->coord_bld;
 287    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 288    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 289    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 290                                                 int_coord_bld->one);
 291    LLVMValueRef mask;
 292    /* wrap with normalized floats is just fract */
 293    coord_f = lp_build_fract(coord_bld, coord_f);
 294    /* mul by size and subtract 0.5 */
 295    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 296    coord_f = lp_build_sub(coord_bld, coord_f, half);
 297    /*
 298     * we avoided the 0.5/length division before the repeat wrap,
 299     * now need to fix up edge cases with selects
 300     */
 301    /* convert to int, compute lerp weight */
 302    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
 303    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 304                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 305    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 306 }
 307
 308
 309 /**
 310  * Build LLVM code for texture wrap mode for linear filtering.
 311  * \param x0_out  returns first integer texcoord
 312  * \param x1_out  returns second integer texcoord
 313  * \param weight_out  returns linear interpolation weight
 314  */
 315 static void
 316 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 317                             LLVMValueRef coord,
 318                             LLVMValueRef length,
 319                             LLVMValueRef length_f,
 320                             LLVMValueRef offset,
 321                             boolean is_pot,
 322                             unsigned wrap_mode,
 323                             LLVMValueRef *x0_out,
 324                             LLVMValueRef *x1_out,
 325                             LLVMValueRef *weight_out)
 326 {
 327    struct lp_build_context *coord_bld = &bld->coord_bld;
 328    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 329    LLVMBuilderRef builder = bld->gallivm->builder;
 330    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 331    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 332    LLVMValueRef coord0, coord1, weight;
 333
 334    switch(wrap_mode) {
 335    case PIPE_TEX_WRAP_REPEAT:
 336       if (is_pot) {
 337          /* mul by size and subtract 0.5 */
 338          coord = lp_build_mul(coord_bld, coord, length_f);
 339          coord = lp_build_sub(coord_bld, coord, half);
 340          if (offset) {
 341             offset = lp_build_int_to_float(coord_bld, offset);
 342             coord = lp_build_add(coord_bld, coord, offset);
 343          }
 344          /* convert to int, compute lerp weight */
 345          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 346          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 347          /* repeat wrap */
 348          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 349          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 350       }
 351       else {
 352          LLVMValueRef mask;
 353          if (offset) {
 354             offset = lp_build_int_to_float(coord_bld, offset);
 355             offset = lp_build_div(coord_bld, offset, length_f);
 356             coord = lp_build_add(coord_bld, coord, offset);
 357          }
 358          lp_build_coord_repeat_npot_linear(bld, coord,
 359                                            length, length_f,
 360                                            &coord0, &weight);
 361          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 362                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 363          coord1 = LLVMBuildAnd(builder,
 364                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
 365                                mask, "");
 366       }
 367       break;
 368
 369    case PIPE_TEX_WRAP_CLAMP:
 370       if (bld->static_sampler_state->normalized_coords) {
 371          /* scale coord to length */
 372          coord = lp_build_mul(coord_bld, coord, length_f);
 373       }
 374       if (offset) {
 375          offset = lp_build_int_to_float(coord_bld, offset);
 376          coord = lp_build_add(coord_bld, coord, offset);
 377       }
 378
 379       /* clamp to [0, length] */
 380       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 381
 382       coord = lp_build_sub(coord_bld, coord, half);
 383
 384       /* convert to int, compute lerp weight */
 385       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 386       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 387       break;
 388
 389    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 390       {
 391          struct lp_build_context abs_coord_bld = bld->coord_bld;
 392          abs_coord_bld.type.sign = FALSE;
 393
 394          if (bld->static_sampler_state->normalized_coords) {
 395             /* mul by tex size */
 396             coord = lp_build_mul(coord_bld, coord, length_f);
 397          }
 398          if (offset) {
 399             offset = lp_build_int_to_float(coord_bld, offset);
 400             coord = lp_build_add(coord_bld, coord, offset);
 401          }
 402
 403          /* clamp to length max */
 404          coord = lp_build_min(coord_bld, coord, length_f);
 405          /* subtract 0.5 */
 406          coord = lp_build_sub(coord_bld, coord, half);
 407          /* clamp to [0, length - 0.5] */
 408          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 409          /* convert to int, compute lerp weight */
 410          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 411          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 412          /* coord1 = min(coord1, length-1) */
 413          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 414          break;
 415       }
 416
 417    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 418       if (bld->static_sampler_state->normalized_coords) {
 419          /* scale coord to length */
 420          coord = lp_build_mul(coord_bld, coord, length_f);
 421       }
 422       if (offset) {
 423          offset = lp_build_int_to_float(coord_bld, offset);
 424          coord = lp_build_add(coord_bld, coord, offset);
 425       }
 426       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
 427       /* can skip clamp (though might not work for very large coord values */
 428       coord = lp_build_sub(coord_bld, coord, half);
 429       /* convert to int, compute lerp weight */
 430       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 431       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 432       break;
 433
 434    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 435       /* compute mirror function */
 436       coord = lp_build_coord_mirror(bld, coord);
 437
 438       /* scale coord to length */
 439       coord = lp_build_mul(coord_bld, coord, length_f);
 440       coord = lp_build_sub(coord_bld, coord, half);
 441       if (offset) {
 442          offset = lp_build_int_to_float(coord_bld, offset);
 443          coord = lp_build_add(coord_bld, coord, offset);
 444       }
 445
 446       /* convert to int, compute lerp weight */
 447       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 448       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 449
 450       /* coord0 = max(coord0, 0) */
 451       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 452       /* coord1 = min(coord1, length-1) */
 453       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 454       break;
 455
 456    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 457       if (bld->static_sampler_state->normalized_coords) {
 458          /* scale coord to length */
 459          coord = lp_build_mul(coord_bld, coord, length_f);
 460       }
 461       if (offset) {
 462          offset = lp_build_int_to_float(coord_bld, offset);
 463          coord = lp_build_add(coord_bld, coord, offset);
 464       }
 465       coord = lp_build_abs(coord_bld, coord);
 466
 467       /* clamp to [0, length] */
 468       coord = lp_build_min(coord_bld, coord, length_f);
 469
 470       coord = lp_build_sub(coord_bld, coord, half);
 471
 472       /* convert to int, compute lerp weight */
 473       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 474       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 475       break;
 476
 477    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 478       {
 479          struct lp_build_context abs_coord_bld = bld->coord_bld;
 480          abs_coord_bld.type.sign = FALSE;
 481
 482          if (bld->static_sampler_state->normalized_coords) {
 483             /* scale coord to length */
 484             coord = lp_build_mul(coord_bld, coord, length_f);
 485          }
 486          if (offset) {
 487             offset = lp_build_int_to_float(coord_bld, offset);
 488             coord = lp_build_add(coord_bld, coord, offset);
 489          }
 490          coord = lp_build_abs(coord_bld, coord);
 491
 492          /* clamp to length max */
 493          coord = lp_build_min(coord_bld, coord, length_f);
 494          /* subtract 0.5 */
 495          coord = lp_build_sub(coord_bld, coord, half);
 496          /* clamp to [0, length - 0.5] */
 497          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 498
 499          /* convert to int, compute lerp weight */
 500          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 501          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 502          /* coord1 = min(coord1, length-1) */
 503          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 504       }
 505       break;
 506
 507    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 508       {
 509          if (bld->static_sampler_state->normalized_coords) {
 510             /* scale coord to length */
 511             coord = lp_build_mul(coord_bld, coord, length_f);
 512          }
 513          if (offset) {
 514             offset = lp_build_int_to_float(coord_bld, offset);
 515             coord = lp_build_add(coord_bld, coord, offset);
 516          }
 517          coord = lp_build_abs(coord_bld, coord);
 518
 519          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
 520          /* skip clamp - always positive, and other side
 521             only potentially matters for very large coords */
 522          coord = lp_build_sub(coord_bld, coord, half);
 523
 524          /* convert to int, compute lerp weight */
 525          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 526          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 527       }
 528       break;
 529
 530    default:
 531       assert(0);
 532       coord0 = NULL;
 533       coord1 = NULL;
 534       weight = NULL;
 535    }
 536
 537    *x0_out = coord0;
 538    *x1_out = coord1;
 539    *weight_out = weight;
 540 }
 541
 542
 543 /**
 544  * Build LLVM code for texture wrap mode for nearest filtering.
 545  * \param coord  the incoming texcoord (nominally in [0,1])
 546  * \param length  the texture size along one dimension, as int vector
 547  * \param length_f  the texture size along one dimension, as float vector
 548  * \param offset  texel offset along one dimension (as int vector)
 549  * \param is_pot  if TRUE, length is a power of two
 550  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 551  */
 552 static LLVMValueRef
 553 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 554                              LLVMValueRef coord,
 555                              LLVMValueRef length,
 556                              LLVMValueRef length_f,
 557                              LLVMValueRef offset,
 558                              boolean is_pot,
 559                              unsigned wrap_mode)
 560 {
 561    struct lp_build_context *coord_bld = &bld->coord_bld;
 562    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 563    LLVMBuilderRef builder = bld->gallivm->builder;
 564    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 565    LLVMValueRef icoord;
 566
 567    switch(wrap_mode) {
 568    case PIPE_TEX_WRAP_REPEAT:
 569       if (is_pot) {
 570          coord = lp_build_mul(coord_bld, coord, length_f);
 571          icoord = lp_build_ifloor(coord_bld, coord);
 572          if (offset) {
 573             icoord = lp_build_add(int_coord_bld, icoord, offset);
 574          }
 575          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
 576       }
 577       else {
 578           if (offset) {
 579              offset = lp_build_int_to_float(coord_bld, offset);
 580              offset = lp_build_div(coord_bld, offset, length_f);
 581              coord = lp_build_add(coord_bld, coord, offset);
 582           }
 583           /* take fraction, unnormalize */
 584           coord = lp_build_fract_safe(coord_bld, coord);
 585           coord = lp_build_mul(coord_bld, coord, length_f);
 586           icoord = lp_build_itrunc(coord_bld, coord);
 587       }
 588       break;
 589
 590    case PIPE_TEX_WRAP_CLAMP:
 591    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 592       if (bld->static_sampler_state->normalized_coords) {
 593          /* scale coord to length */
 594          coord = lp_build_mul(coord_bld, coord, length_f);
 595       }
 596
 597       /* floor */
 598       /* use itrunc instead since we clamp to 0 anyway */
 599       icoord = lp_build_itrunc(coord_bld, coord);
 600       if (offset) {
 601          icoord = lp_build_add(int_coord_bld, icoord, offset);
 602       }
 603
 604       /* clamp to [0, length - 1]. */
 605       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 606                               length_minus_one);
 607       break;
 608
 609    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 610       if (bld->static_sampler_state->normalized_coords) {
 611          /* scale coord to length */
 612          coord = lp_build_mul(coord_bld, coord, length_f);
 613       }
 614       /* no clamp necessary, border masking will handle this */
 615       icoord = lp_build_ifloor(coord_bld, coord);
 616       if (offset) {
 617          icoord = lp_build_add(int_coord_bld, icoord, offset);
 618       }
 619       break;
 620
 621    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 622       if (offset) {
 623          offset = lp_build_int_to_float(coord_bld, offset);
 624          offset = lp_build_div(coord_bld, offset, length_f);
 625          coord = lp_build_add(coord_bld, coord, offset);
 626       }
 627       /* compute mirror function */
 628       coord = lp_build_coord_mirror(bld, coord);
 629
 630       /* scale coord to length */
 631       assert(bld->static_sampler_state->normalized_coords);
 632       coord = lp_build_mul(coord_bld, coord, length_f);
 633
 634       /* itrunc == ifloor here */
 635       icoord = lp_build_itrunc(coord_bld, coord);
 636
 637       /* clamp to [0, length - 1] */
 638       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 639       break;
 640
 641    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 642    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 643       if (bld->static_sampler_state->normalized_coords) {
 644          /* scale coord to length */
 645          coord = lp_build_mul(coord_bld, coord, length_f);
 646       }
 647       if (offset) {
 648          offset = lp_build_int_to_float(coord_bld, offset);
 649          coord = lp_build_add(coord_bld, coord, offset);
 650       }
 651       coord = lp_build_abs(coord_bld, coord);
 652
 653       /* itrunc == ifloor here */
 654       icoord = lp_build_itrunc(coord_bld, coord);
 655
 656       /* clamp to [0, length - 1] */
 657       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 658       break;
 659
 660    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 661       if (bld->static_sampler_state->normalized_coords) {
 662          /* scale coord to length */
 663          coord = lp_build_mul(coord_bld, coord, length_f);
 664       }
 665       if (offset) {
 666          offset = lp_build_int_to_float(coord_bld, offset);
 667          coord = lp_build_add(coord_bld, coord, offset);
 668       }
 669       coord = lp_build_abs(coord_bld, coord);
 670
 671       /* itrunc == ifloor here */
 672       icoord = lp_build_itrunc(coord_bld, coord);
 673       break;
 674
 675    default:
 676       assert(0);
 677       icoord = NULL;
 678    }
 679
 680    return icoord;
 681 }
 682
 683
 684 /**
 685  * Do shadow test/comparison.
 686  * \param p shadow ref value
 687  * \param texel  the texel to compare against
 688  */
 689 static LLVMValueRef
 690 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
 691                             LLVMValueRef p,
 692                             LLVMValueRef texel)
 693 {
 694    struct lp_build_context *texel_bld = &bld->texel_bld;
 695    LLVMValueRef res;
 696
 697    if (0) {
 698       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
 699       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
 700    }
 701
 702    /* result = (p FUNC texel) ? 1 : 0 */
 703    /*
 704     * honor d3d10 floating point rules here, which state that comparisons
 705     * are ordered except NOT_EQUAL which is unordered.
 706     */
 707    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
 708       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
 709                                  p, texel);
 710    }
 711    else {
 712       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
 713                          p, texel);
 714    }
 715    return res;
 716 }
 717
 718
 719 /**
 720  * Generate code to sample a mipmap level with nearest filtering.
 721  * If sampling a cube texture, r = cube face in [0,5].
 722  */
 723 static void
 724 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 725                               unsigned sampler_unit,
 726                               LLVMValueRef size,
 727                               LLVMValueRef row_stride_vec,
 728                               LLVMValueRef img_stride_vec,
 729                               LLVMValueRef data_ptr,
 730                               LLVMValueRef mipoffsets,
 731                               LLVMValueRef *coords,
 732                               const LLVMValueRef *offsets,
 733                               LLVMValueRef colors_out[4])
 734 {
 735    const unsigned dims = bld->dims;
 736    LLVMValueRef width_vec;
 737    LLVMValueRef height_vec;
 738    LLVMValueRef depth_vec;
 739    LLVMValueRef flt_size;
 740    LLVMValueRef flt_width_vec;
 741    LLVMValueRef flt_height_vec;
 742    LLVMValueRef flt_depth_vec;
 743    LLVMValueRef x, y = NULL, z = NULL;
 744
 745    lp_build_extract_image_sizes(bld,
 746                                 &bld->int_size_bld,
 747                                 bld->int_coord_type,
 748                                 size,
 749                                 &width_vec, &height_vec, &depth_vec);
 750
 751    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 752
 753    lp_build_extract_image_sizes(bld,
 754                                 &bld->float_size_bld,
 755                                 bld->coord_type,
 756                                 flt_size,
 757                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 758
 759    /*
 760     * Compute integer texcoords.
 761     */
 762    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
 763                                     flt_width_vec, offsets[0],
 764                                     bld->static_texture_state->pot_width,
 765                                     bld->static_sampler_state->wrap_s);
 766    lp_build_name(x, "tex.x.wrapped");
 767
 768    if (dims >= 2) {
 769       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
 770                                        flt_height_vec, offsets[1],
 771                                        bld->static_texture_state->pot_height,
 772                                        bld->static_sampler_state->wrap_t);
 773       lp_build_name(y, "tex.y.wrapped");
 774
 775       if (dims == 3) {
 776          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
 777                                           flt_depth_vec, offsets[2],
 778                                           bld->static_texture_state->pot_depth,
 779                                           bld->static_sampler_state->wrap_r);
 780          lp_build_name(z, "tex.z.wrapped");
 781       }
 782    }
 783    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 784        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 785        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 786       z = coords[2];
 787       lp_build_name(z, "tex.z.layer");
 788    }
 789
 790    /*
 791     * Get texture colors.
 792     */
 793    lp_build_sample_texel_soa(bld, sampler_unit,
 794                              width_vec, height_vec, depth_vec,
 795                              x, y, z,
 796                              row_stride_vec, img_stride_vec,
 797                              data_ptr, mipoffsets, colors_out);
 798
 799    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
 800       LLVMValueRef cmpval;
 801       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
 802       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
 803       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
 804                                       bld->texel_bld.one, bld->texel_bld.zero);
 805       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 806    }
 807
 808 }
 809
 810
 811 /**
 812  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
 813  */
 814 static LLVMValueRef
 815 lp_build_masklerp(struct lp_build_context *bld,
 816                  LLVMValueRef weight,
 817                  LLVMValueRef mask0,
 818                  LLVMValueRef mask1)
 819 {
 820    struct gallivm_state *gallivm = bld->gallivm;
 821    LLVMBuilderRef builder = gallivm->builder;
 822    LLVMValueRef weight2;
 823
 824    weight2 = lp_build_sub(bld, bld->one, weight);
 825    weight = LLVMBuildBitCast(builder, weight,
 826                               lp_build_int_vec_type(gallivm, bld->type), "");
 827    weight2 = LLVMBuildBitCast(builder, weight2,
 828                               lp_build_int_vec_type(gallivm, bld->type), "");
 829    weight = LLVMBuildAnd(builder, weight, mask1, "");
 830    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
 831    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
 832    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
 833    return lp_build_add(bld, weight, weight2);
 834 }
 835
 836 /**
 837  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
 838  */
 839 static LLVMValueRef
 840 lp_build_masklerp2d(struct lp_build_context *bld,
 841                     LLVMValueRef weight0,
 842                     LLVMValueRef weight1,
 843                     LLVMValueRef mask00,
 844                     LLVMValueRef mask01,
 845                     LLVMValueRef mask10,
 846                     LLVMValueRef mask11)
 847 {
 848    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
 849    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
 850    return lp_build_lerp(bld, weight1, val0, val1, 0);
 851 }
 852
 853 /**
 854  * Generate code to sample a mipmap level with linear filtering.
 855  * If sampling a cube texture, r = cube face in [0,5].
 856  */
 857 static void
 858 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 859                              unsigned sampler_unit,
 860                              LLVMValueRef size,
 861                              LLVMValueRef row_stride_vec,
 862                              LLVMValueRef img_stride_vec,
 863                              LLVMValueRef data_ptr,
 864                              LLVMValueRef mipoffsets,
 865                              LLVMValueRef *coords,
 866                              const LLVMValueRef *offsets,
 867                              LLVMValueRef colors_out[4])
 868 {
 869    const unsigned dims = bld->dims;
 870    LLVMValueRef width_vec;
 871    LLVMValueRef height_vec;
 872    LLVMValueRef depth_vec;
 873    LLVMValueRef flt_size;
 874    LLVMValueRef flt_width_vec;
 875    LLVMValueRef flt_height_vec;
 876    LLVMValueRef flt_depth_vec;
 877    LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
 878    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
 879    LLVMValueRef neighbors[2][2][4];
 880    int chan;
 881
 882    lp_build_extract_image_sizes(bld,
 883                                 &bld->int_size_bld,
 884                                 bld->int_coord_type,
 885                                 size,
 886                                 &width_vec, &height_vec, &depth_vec);
 887
 888    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 889
 890    lp_build_extract_image_sizes(bld,
 891                                 &bld->float_size_bld,
 892                                 bld->coord_type,
 893                                 flt_size,
 894                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 895
 896    /*
 897     * Compute integer texcoords.
 898     */
 899    lp_build_sample_wrap_linear(bld, coords[0], width_vec,
 900                                flt_width_vec, offsets[0],
 901                                bld->static_texture_state->pot_width,
 902                                bld->static_sampler_state->wrap_s,
 903                                &x0, &x1, &s_fpart);
 904    lp_build_name(x0, "tex.x0.wrapped");
 905    lp_build_name(x1, "tex.x1.wrapped");
 906
 907    if (dims >= 2) {
 908       lp_build_sample_wrap_linear(bld, coords[1], height_vec,
 909                                   flt_height_vec, offsets[1],
 910                                   bld->static_texture_state->pot_height,
 911                                   bld->static_sampler_state->wrap_t,
 912                                   &y0, &y1, &t_fpart);
 913       lp_build_name(y0, "tex.y0.wrapped");
 914       lp_build_name(y1, "tex.y1.wrapped");
 915
 916       if (dims == 3) {
 917          lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
 918                                      flt_depth_vec, offsets[2],
 919                                      bld->static_texture_state->pot_depth,
 920                                      bld->static_sampler_state->wrap_r,
 921                                      &z0, &z1, &r_fpart);
 922          lp_build_name(z0, "tex.z0.wrapped");
 923          lp_build_name(z1, "tex.z1.wrapped");
 924       }
 925    }
 926    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 927        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 928        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 929       z0 = z1 = coords[2];  /* cube face or layer */
 930       lp_build_name(z0, "tex.z0.layer");
 931       lp_build_name(z1, "tex.z1.layer");
 932    }
 933
 934
 935    /*
 936     * Get texture colors.
 937     */
 938    /* get x0/x1 texels */
 939    lp_build_sample_texel_soa(bld, sampler_unit,
 940                              width_vec, height_vec, depth_vec,
 941                              x0, y0, z0,
 942                              row_stride_vec, img_stride_vec,
 943                              data_ptr, mipoffsets, neighbors[0][0]);
 944    lp_build_sample_texel_soa(bld, sampler_unit,
 945                              width_vec, height_vec, depth_vec,
 946                              x1, y0, z0,
 947                              row_stride_vec, img_stride_vec,
 948                              data_ptr, mipoffsets, neighbors[0][1]);
 949
 950    if (dims == 1) {
 951       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
 952          /* Interpolate two samples from 1D image to produce one color */
 953          for (chan = 0; chan < 4; chan++) {
 954             colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
 955                                              neighbors[0][0][chan],
 956                                              neighbors[0][1][chan],
 957                                              0);
 958          }
 959       }
 960       else {
 961          LLVMValueRef cmpval0, cmpval1;
 962          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
 963          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
 964          /* simplified lerp, AND mask with weight and add */
 965          colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
 966                                            cmpval0, cmpval1);
 967          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 968       }
 969    }
 970    else {
 971       /* 2D/3D texture */
 972       LLVMValueRef colors0[4];
 973
 974       /* get x0/x1 texels at y1 */
 975       lp_build_sample_texel_soa(bld, sampler_unit,
 976                                 width_vec, height_vec, depth_vec,
 977                                 x0, y1, z0,
 978                                 row_stride_vec, img_stride_vec,
 979                                 data_ptr, mipoffsets, neighbors[1][0]);
 980       lp_build_sample_texel_soa(bld, sampler_unit,
 981                                 width_vec, height_vec, depth_vec,
 982                                 x1, y1, z0,
 983                                 row_stride_vec, img_stride_vec,
 984                                 data_ptr, mipoffsets, neighbors[1][1]);
 985
 986       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
 987          /* Bilinear interpolate the four samples from the 2D image / 3D slice */
 988          for (chan = 0; chan < 4; chan++) {
 989             colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
 990                                              s_fpart, t_fpart,
 991                                              neighbors[0][0][chan],
 992                                              neighbors[0][1][chan],
 993                                              neighbors[1][0][chan],
 994                                              neighbors[1][1][chan],
 995                                              0);
 996          }
 997       }
 998       else {
 999          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1000          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1001          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1002          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1003          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1004          colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1005                                           cmpval00, cmpval01, cmpval10, cmpval11);
1006          colors0[1] = colors0[2] = colors0[3] = colors0[0];
1007       }
1008
1009       if (dims == 3) {
1010          LLVMValueRef neighbors1[2][2][4];
1011          LLVMValueRef colors1[4];
1012
1013          /* get x0/x1/y0/y1 texels at z1 */
1014          lp_build_sample_texel_soa(bld, sampler_unit,
1015                                    width_vec, height_vec, depth_vec,
1016                                    x0, y0, z1,
1017                                    row_stride_vec, img_stride_vec,
1018                                    data_ptr, mipoffsets, neighbors1[0][0]);
1019          lp_build_sample_texel_soa(bld, sampler_unit,
1020                                    width_vec, height_vec, depth_vec,
1021                                    x1, y0, z1,
1022                                    row_stride_vec, img_stride_vec,
1023                                    data_ptr, mipoffsets, neighbors1[0][1]);
1024          lp_build_sample_texel_soa(bld, sampler_unit,
1025                                    width_vec, height_vec, depth_vec,
1026                                    x0, y1, z1,
1027                                    row_stride_vec, img_stride_vec,
1028                                    data_ptr, mipoffsets, neighbors1[1][0]);
1029          lp_build_sample_texel_soa(bld, sampler_unit,
1030                                    width_vec, height_vec, depth_vec,
1031                                    x1, y1, z1,
1032                                    row_stride_vec, img_stride_vec,
1033                                    data_ptr, mipoffsets, neighbors1[1][1]);
1034
1035          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1036             /* Bilinear interpolate the four samples from the second Z slice */
1037             for (chan = 0; chan < 4; chan++) {
1038                colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1039                                                 s_fpart, t_fpart,
1040                                                 neighbors1[0][0][chan],
1041                                                 neighbors1[0][1][chan],
1042                                                 neighbors1[1][0][chan],
1043                                                 neighbors1[1][1][chan],
1044                                                 0);
1045             }
1046             /* Linearly interpolate the two samples from the two 3D slices */
1047             for (chan = 0; chan < 4; chan++) {
1048                colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1049                                                 r_fpart,
1050                                                 colors0[chan], colors1[chan],
1051                                                 0);
1052             }
1053          }
1054          else {
1055             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1056             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1057             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1058             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1059             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1060             colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1061                                              cmpval00, cmpval01, cmpval10, cmpval11);
1062             /* Linearly interpolate the two samples from the two 3D slices */
1063             colors_out[0] = lp_build_lerp(&bld->texel_bld,
1064                                              r_fpart,
1065                                              colors0[0], colors1[0],
1066                                              0);
1067             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1068          }
1069       }
1070       else {
1071          /* 2D tex */
1072          for (chan = 0; chan < 4; chan++) {
1073             colors_out[chan] = colors0[chan];
1074          }
1075       }
1076    }
1077 }
1078
1079
1080 /**
1081  * Sample the texture/mipmap using given image filter and mip filter.
1082  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1083  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1084  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1085  */
1086 static void
1087 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1088                        unsigned sampler_unit,
1089                        unsigned img_filter,
1090                        unsigned mip_filter,
1091                        LLVMValueRef *coords,
1092                        const LLVMValueRef *offsets,
1093                        LLVMValueRef ilevel0,
1094                        LLVMValueRef ilevel1,
1095                        LLVMValueRef lod_fpart,
1096                        LLVMValueRef *colors_out)
1097 {
1098    LLVMBuilderRef builder = bld->gallivm->builder;
1099    LLVMValueRef size0 = NULL;
1100    LLVMValueRef size1 = NULL;
1101    LLVMValueRef row_stride0_vec = NULL;
1102    LLVMValueRef row_stride1_vec = NULL;
1103    LLVMValueRef img_stride0_vec = NULL;
1104    LLVMValueRef img_stride1_vec = NULL;
1105    LLVMValueRef data_ptr0 = NULL;
1106    LLVMValueRef data_ptr1 = NULL;
1107    LLVMValueRef mipoff0 = NULL;
1108    LLVMValueRef mipoff1 = NULL;
1109    LLVMValueRef colors0[4], colors1[4];
1110    unsigned chan;
1111
1112    /* sample the first mipmap level */
1113    lp_build_mipmap_level_sizes(bld, ilevel0,
1114                                &size0,
1115                                &row_stride0_vec, &img_stride0_vec);
1116    if (bld->num_lods == 1) {
1117       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1118    }
1119    else {
1120       /* This path should work for num_lods 1 too but slightly less efficient */
1121       data_ptr0 = bld->base_ptr;
1122       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1123    }
1124    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1125       lp_build_sample_image_nearest(bld, sampler_unit,
1126                                     size0,
1127                                     row_stride0_vec, img_stride0_vec,
1128                                     data_ptr0, mipoff0, coords, offsets,
1129                                     colors0);
1130    }
1131    else {
1132       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1133       lp_build_sample_image_linear(bld, sampler_unit,
1134                                    size0,
1135                                    row_stride0_vec, img_stride0_vec,
1136                                    data_ptr0, mipoff0, coords, offsets,
1137                                    colors0);
1138    }
1139
1140    /* Store the first level's colors in the output variables */
1141    for (chan = 0; chan < 4; chan++) {
1142        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1143    }
1144
1145    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1146       struct lp_build_if_state if_ctx;
1147       LLVMValueRef need_lerp;
1148
1149       /* need_lerp = lod_fpart > 0 */
1150       if (bld->num_lods == 1) {
1151          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1152                                    lod_fpart, bld->levelf_bld.zero,
1153                                    "need_lerp");
1154       }
1155       else {
1156          /*
1157           * We'll do mip filtering if any of the quads (or individual
1158           * pixel in case of per-pixel lod) need it.
1159           * It might be better to split the vectors here and only fetch/filter
1160           * quads which need it.
1161           */
1162          /*
1163           * We unfortunately need to clamp lod_fpart here since we can get
1164           * negative values which would screw up filtering if not all
1165           * lod_fpart values have same sign.
1166           */
1167          lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
1168                                   bld->levelf_bld.zero);
1169          need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
1170                                       PIPE_FUNC_GREATER,
1171                                       lod_fpart, bld->levelf_bld.zero);
1172          need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
1173       }
1174
1175       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1176       {
1177          /* sample the second mipmap level */
1178          lp_build_mipmap_level_sizes(bld, ilevel1,
1179                                      &size1,
1180                                      &row_stride1_vec, &img_stride1_vec);
1181          if (bld->num_lods == 1) {
1182             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1183          }
1184          else {
1185             data_ptr1 = bld->base_ptr;
1186             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1187          }
1188          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1189             lp_build_sample_image_nearest(bld, sampler_unit,
1190                                           size1,
1191                                           row_stride1_vec, img_stride1_vec,
1192                                           data_ptr1, mipoff1, coords, offsets,
1193                                           colors1);
1194          }
1195          else {
1196             lp_build_sample_image_linear(bld, sampler_unit,
1197                                          size1,
1198                                          row_stride1_vec, img_stride1_vec,
1199                                          data_ptr1, mipoff1, coords, offsets,
1200                                          colors1);
1201          }
1202
1203          /* interpolate samples from the two mipmap levels */
1204
1205          if (bld->num_lods != bld->coord_type.length)
1206             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1207                                                               bld->levelf_bld.type,
1208                                                               bld->texel_bld.type,
1209                                                               lod_fpart);
1210
1211          for (chan = 0; chan < 4; chan++) {
1212             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1213                                           colors0[chan], colors1[chan],
1214                                           0);
1215             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1216          }
1217       }
1218       lp_build_endif(&if_ctx);
1219    }
1220 }
1221
1222
1223 /**
1224  * Build (per-coord) layer value.
1225  * Either clamp layer to valid values or fill in optional out_of_bounds
1226  * value and just return value unclamped.
1227  */
1228 static LLVMValueRef
1229 lp_build_layer_coord(struct lp_build_sample_context *bld,
1230                      unsigned texture_unit,
1231                      LLVMValueRef layer,
1232                      LLVMValueRef *out_of_bounds)
1233 {
1234    LLVMValueRef num_layers;
1235    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1236
1237    num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1238                                           bld->gallivm, texture_unit);
1239
1240    if (out_of_bounds) {
1241       LLVMValueRef out1, out;
1242       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1243       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1244       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1245       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1246       return layer;
1247    }
1248    else {
1249       LLVMValueRef maxlayer;
1250       maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1251       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1252       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1253    }
1254 }
1255
1256
1257 /**
1258  * Calculate cube face, lod, mip levels.
1259  */
1260 static void
1261 lp_build_sample_common(struct lp_build_sample_context *bld,
1262                        unsigned texture_index,
1263                        unsigned sampler_index,
1264                        LLVMValueRef *coords,
1265                        const struct lp_derivatives *derivs, /* optional */
1266                        LLVMValueRef lod_bias, /* optional */
1267                        LLVMValueRef explicit_lod, /* optional */
1268                        LLVMValueRef *lod_ipart,
1269                        LLVMValueRef *lod_fpart,
1270                        LLVMValueRef *ilevel0,
1271                        LLVMValueRef *ilevel1)
1272 {
1273    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1274    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1275    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1276    const unsigned target = bld->static_texture_state->target;
1277    LLVMValueRef first_level, cube_rho = NULL;
1278
1279    /*
1280    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1281           mip_filter, min_filter, mag_filter);
1282    */
1283
1284    /*
1285     * Choose cube face, recompute texcoords for the chosen face and
1286     * compute rho here too (as it requires transform of derivatives).
1287     */
1288    if (target == PIPE_TEXTURE_CUBE) {
1289       boolean need_derivs;
1290       need_derivs = ((min_filter != mag_filter ||
1291                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1292                       !bld->static_sampler_state->min_max_lod_equal &&
1293                       !explicit_lod);
1294       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
1295    }
1296    else if (target == PIPE_TEXTURE_1D_ARRAY ||
1297             target == PIPE_TEXTURE_2D_ARRAY) {
1298       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1299       coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
1300    }
1301
1302    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1303       /*
1304        * Clamp p coords to [0,1] for fixed function depth texture format here.
1305        * Technically this is not entirely correct for unorm depth as the ref value
1306        * should be converted to the depth format (quantization!) and comparison
1307        * then done in texture format. This would actually help performance (since
1308        * only need to do it once and could save the per-sample conversion of texels
1309        * to floats instead), but it would need more messy code (would need to push
1310        * at least some bits down to actual fetch so conversion could be skipped,
1311        * and would have ugly interaction with border color, would need to convert
1312        * border color to that format too or do some other tricks to make it work).
1313        */
1314       const struct util_format_description *format_desc;
1315       unsigned chan_type;
1316       format_desc = util_format_description(bld->static_texture_state->format);
1317       /* not entirely sure we couldn't end up with non-valid swizzle here */
1318       chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1319                      format_desc->channel[format_desc->swizzle[0]].type :
1320                      UTIL_FORMAT_TYPE_FLOAT;
1321       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1322          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1323                                     bld->coord_bld.zero, bld->coord_bld.one);
1324       }
1325    }
1326
1327    /*
1328     * Compute the level of detail (float).
1329     */
1330    if (min_filter != mag_filter ||
1331        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1332       /* Need to compute lod either to choose mipmap levels or to
1333        * distinguish between minification/magnification with one mipmap level.
1334        */
1335       lp_build_lod_selector(bld, texture_index, sampler_index,
1336                             coords[0], coords[1], coords[2], cube_rho,
1337                             derivs, lod_bias, explicit_lod,
1338                             mip_filter,
1339                             lod_ipart, lod_fpart);
1340    } else {
1341       *lod_ipart = bld->leveli_bld.zero;
1342    }
1343
1344    /*
1345     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1346     */
1347    switch (mip_filter) {
1348    default:
1349       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1350       /* fall-through */
1351    case PIPE_TEX_MIPFILTER_NONE:
1352       /* always use mip level 0 */
1353       if (HAVE_LLVM == 0x0207 && target == PIPE_TEXTURE_CUBE) {
1354          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1355           * We should be able to set ilevel0 = const(0) but that causes
1356           * bad x86 code to be emitted.
1357           */
1358          assert(*lod_ipart);
1359          lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1360       }
1361       else {
1362          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1363                                                        bld->gallivm, texture_index);
1364          first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1365          *ilevel0 = first_level;
1366       }
1367       break;
1368    case PIPE_TEX_MIPFILTER_NEAREST:
1369       assert(*lod_ipart);
1370       lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1371       break;
1372    case PIPE_TEX_MIPFILTER_LINEAR:
1373       assert(*lod_ipart);
1374       assert(*lod_fpart);
1375       lp_build_linear_mip_levels(bld, texture_index,
1376                                  *lod_ipart, lod_fpart,
1377                                  ilevel0, ilevel1);
1378       break;
1379    }
1380 }
1381
1382 /**
1383  * General texture sampling codegen.
1384  * This function handles texture sampling for all texture targets (1D,
1385  * 2D, 3D, cube) and all filtering modes.
1386  */
1387 static void
1388 lp_build_sample_general(struct lp_build_sample_context *bld,
1389                         unsigned sampler_unit,
1390                         LLVMValueRef *coords,
1391                         const LLVMValueRef *offsets,
1392                         LLVMValueRef lod_ipart,
1393                         LLVMValueRef lod_fpart,
1394                         LLVMValueRef ilevel0,
1395                         LLVMValueRef ilevel1,
1396                         LLVMValueRef *colors_out)
1397 {
1398    struct lp_build_context *int_bld = &bld->int_bld;
1399    LLVMBuilderRef builder = bld->gallivm->builder;
1400    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1401    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1402    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1403    LLVMValueRef texels[4];
1404    unsigned chan;
1405
1406    /*
1407     * Get/interpolate texture colors.
1408     */
1409
1410    for (chan = 0; chan < 4; ++chan) {
1411      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
1412      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
1413    }
1414
1415    if (min_filter == mag_filter) {
1416       /* no need to distinguish between minification and magnification */
1417       lp_build_sample_mipmap(bld, sampler_unit,
1418                              min_filter, mip_filter,
1419                              coords, offsets,
1420                              ilevel0, ilevel1, lod_fpart,
1421                              texels);
1422    }
1423    else {
1424       /* Emit conditional to choose min image filter or mag image filter
1425        * depending on the lod being > 0 or <= 0, respectively.
1426        */
1427       struct lp_build_if_state if_ctx;
1428       LLVMValueRef minify;
1429
1430       /*
1431        * XXX this should to all lods into account, if some are min
1432        * some max probably could hack up the coords/weights in the linear
1433        * path with selects to work for nearest.
1434        * If that's just two quads sitting next to each other it seems
1435        * quite ok to do the same filtering method on both though, at
1436        * least unless we have explicit lod (and who uses different
1437        * min/mag filter with that?)
1438        */
1439       if (bld->num_lods > 1)
1440          lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1441                                              lp_build_const_int32(bld->gallivm, 0), "");
1442
1443       /* minify = lod >= 0.0 */
1444       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1445                              lod_ipart, int_bld->zero, "");
1446
1447       lp_build_if(&if_ctx, bld->gallivm, minify);
1448       {
1449          /* Use the minification filter */
1450          lp_build_sample_mipmap(bld, sampler_unit,
1451                                 min_filter, mip_filter,
1452                                 coords, offsets,
1453                                 ilevel0, ilevel1, lod_fpart,
1454                                 texels);
1455       }
1456       lp_build_else(&if_ctx);
1457       {
1458          /* Use the magnification filter */
1459          lp_build_sample_mipmap(bld, sampler_unit,
1460                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1461                                 coords, offsets,
1462                                 ilevel0, NULL, NULL,
1463                                 texels);
1464       }
1465       lp_build_endif(&if_ctx);
1466    }
1467
1468    for (chan = 0; chan < 4; ++chan) {
1469      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
1470      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
1471    }
1472 }
1473
1474
1475 /**
1476  * Texel fetch function.
1477  * In contrast to general sampling there is no filtering, no coord minification,
1478  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
1479  * directly to be applied to the selected mip level (after adding texel offsets).
1480  * This function handles texel fetch for all targets where texel fetch is supported
1481  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
1482  */
1483 static void
1484 lp_build_fetch_texel(struct lp_build_sample_context *bld,
1485                      unsigned texture_unit,
1486                      const LLVMValueRef *coords,
1487                      LLVMValueRef explicit_lod,
1488                      const LLVMValueRef *offsets,
1489                      LLVMValueRef *colors_out)
1490 {
1491    struct lp_build_context *perquadi_bld = &bld->leveli_bld;
1492    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1493    unsigned dims = bld->dims, chan;
1494    unsigned target = bld->static_texture_state->target;
1495    boolean out_of_bound_ret_zero = TRUE;
1496    LLVMValueRef size, ilevel;
1497    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
1498    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
1499    LLVMValueRef width, height, depth, i, j;
1500    LLVMValueRef offset, out_of_bounds, out1;
1501
1502    out_of_bounds = int_coord_bld->zero;
1503
1504    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
1505       if (bld->num_lods != int_coord_bld->type.length) {
1506          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
1507                                             perquadi_bld->type, explicit_lod, 0);
1508       }
1509       else {
1510          ilevel = explicit_lod;
1511       }
1512       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
1513                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
1514    }
1515    else {
1516       assert(bld->num_lods == 1);
1517       if (bld->static_texture_state->target != PIPE_BUFFER) {
1518          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
1519                                                   bld->gallivm, texture_unit);
1520       }
1521       else {
1522          ilevel = lp_build_const_int32(bld->gallivm, 0);
1523       }
1524    }
1525    lp_build_mipmap_level_sizes(bld, ilevel,
1526                                &size,
1527                                &row_stride_vec, &img_stride_vec);
1528    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
1529                                 size, &width, &height, &depth);
1530
1531    if (target == PIPE_TEXTURE_1D_ARRAY ||
1532        target == PIPE_TEXTURE_2D_ARRAY) {
1533       if (out_of_bound_ret_zero) {
1534          z = lp_build_layer_coord(bld, texture_unit, z, &out1);
1535          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1536       }
1537       else {
1538          z = lp_build_layer_coord(bld, texture_unit, z, NULL);
1539       }
1540    }
1541
1542    /* This is a lot like border sampling */
1543    if (offsets[0]) {
1544       /*
1545        * coords are really unsigned, offsets are signed, but I don't think
1546        * exceeding 31 bits is possible
1547        */
1548       x = lp_build_add(int_coord_bld, x, offsets[0]);
1549    }
1550    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
1551    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1552    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
1553    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1554
1555    if (dims >= 2) {
1556       if (offsets[1]) {
1557          y = lp_build_add(int_coord_bld, y, offsets[1]);
1558       }
1559       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
1560       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1561       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
1562       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1563
1564       if (dims >= 3) {
1565          if (offsets[2]) {
1566             z = lp_build_add(int_coord_bld, z, offsets[2]);
1567          }
1568          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
1569          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1570          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
1571          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1572       }
1573    }
1574
1575    lp_build_sample_offset(int_coord_bld,
1576                           bld->format_desc,
1577                           x, y, z, row_stride_vec, img_stride_vec,
1578                           &offset, &i, &j);
1579
1580    if (bld->static_texture_state->target != PIPE_BUFFER) {
1581       offset = lp_build_add(int_coord_bld, offset,
1582                             lp_build_get_mip_offsets(bld, ilevel));
1583    }
1584
1585    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
1586
1587    lp_build_fetch_rgba_soa(bld->gallivm,
1588                            bld->format_desc,
1589                            bld->texel_type,
1590                            bld->base_ptr, offset,
1591                            i, j,
1592                            colors_out);
1593
1594    if (out_of_bound_ret_zero) {
1595       /*
1596        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
1597        * Could use min/max above instead of out-of-bounds comparisons
1598        * if we don't care about the result returned for out-of-bounds.
1599        */
1600       for (chan = 0; chan < 4; chan++) {
1601          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
1602                                             bld->texel_bld.zero, colors_out[chan]);
1603       }
1604    }
1605 }
1606
1607
1608 /**
1609  * Just set texels to white instead of actually sampling the texture.
1610  * For debugging.
1611  */
1612 void
1613 lp_build_sample_nop(struct gallivm_state *gallivm,
1614                     struct lp_type type,
1615                     const LLVMValueRef *coords,
1616                     LLVMValueRef texel_out[4])
1617 {
1618    LLVMValueRef one = lp_build_one(gallivm, type);
1619    unsigned chan;
1620
1621    for (chan = 0; chan < 4; chan++) {
1622       texel_out[chan] = one;
1623    }
1624 }
1625
1626
1627 /**
1628  * Build texture sampling code.
1629  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1630  * R, G, B, A.
1631  * \param type  vector float type to use for coords, etc.
1632  * \param is_fetch  if this is a texel fetch instruction.
1633  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
1634  */
1635 void
1636 lp_build_sample_soa(struct gallivm_state *gallivm,
1637                     const struct lp_static_texture_state *static_texture_state,
1638                     const struct lp_static_sampler_state *static_sampler_state,
1639                     struct lp_sampler_dynamic_state *dynamic_state,
1640                     struct lp_type type,
1641                     boolean is_fetch,
1642                     unsigned texture_index,
1643                     unsigned sampler_index,
1644                     const LLVMValueRef *coords,
1645                     const LLVMValueRef *offsets,
1646                     const struct lp_derivatives *derivs, /* optional */
1647                     LLVMValueRef lod_bias, /* optional */
1648                     LLVMValueRef explicit_lod, /* optional */
1649                     enum lp_sampler_lod_property lod_property,
1650                     LLVMValueRef texel_out[4])
1651 {
1652    unsigned target = static_texture_state->target;
1653    unsigned dims = texture_dims(target);
1654    unsigned num_quads = type.length / 4;
1655    unsigned mip_filter, i;
1656    struct lp_build_sample_context bld;
1657    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
1658    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
1659    LLVMBuilderRef builder = gallivm->builder;
1660    LLVMValueRef tex_width, newcoords[5];
1661
1662    if (0) {
1663       enum pipe_format fmt = static_texture_state->format;
1664       debug_printf("Sample from %s\n", util_format_name(fmt));
1665    }
1666
1667    assert(type.floating);
1668
1669    /* Setup our build context */
1670    memset(&bld, 0, sizeof bld);
1671    bld.gallivm = gallivm;
1672    bld.static_sampler_state = &derived_sampler_state;
1673    bld.static_texture_state = static_texture_state;
1674    bld.dynamic_state = dynamic_state;
1675    bld.format_desc = util_format_description(static_texture_state->format);
1676    bld.dims = dims;
1677
1678    bld.vector_width = lp_type_width(type);
1679
1680    bld.float_type = lp_type_float(32);
1681    bld.int_type = lp_type_int(32);
1682    bld.coord_type = type;
1683    bld.int_coord_type = lp_int_type(type);
1684    bld.float_size_in_type = lp_type_float(32);
1685    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
1686    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
1687    bld.texel_type = type;
1688
1689    /* always using the first channel hopefully should be safe,
1690     * if not things WILL break in other places anyway.
1691     */
1692    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
1693        bld.format_desc->channel[0].pure_integer) {
1694       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
1695          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1696       }
1697       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1698          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
1699       }
1700    }
1701    else if (util_format_has_stencil(bld.format_desc) &&
1702        !util_format_has_depth(bld.format_desc)) {
1703       /* for stencil only formats, sample stencil (uint) */
1704       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1705    }
1706
1707    if (!static_texture_state->level_zero_only) {
1708       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
1709    } else {
1710       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
1711    }
1712    mip_filter = derived_sampler_state.min_mip_filter;
1713
1714    if (0) {
1715       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
1716    }
1717
1718    /*
1719     * This is all a bit complicated different paths are chosen for performance
1720     * reasons.
1721     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
1722     * everything (the last two options are equivalent for 4-wide case).
1723     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
1724     * lod is calculated then the lod value extracted afterwards so making this
1725     * case basically the same as far as lod handling is concerned for the
1726     * further sample/filter code as the 1 lod for everything case.
1727     * Different lod handling mostly shows up when building mipmap sizes
1728     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
1729     * (getting the fractional part of the lod to the right texels).
1730     */
1731
1732    /*
1733     * There are other situations where at least the multiple int lods could be
1734     * avoided like min and max lod being equal.
1735     */
1736    if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
1737        ((is_fetch && target != PIPE_BUFFER) ||
1738         (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
1739       bld.num_lods = type.length;
1740    /* TODO: for true scalar_lod should only use 1 lod value */
1741    else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
1742             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
1743       bld.num_lods = num_quads;
1744    }
1745    else {
1746       bld.num_lods = 1;
1747    }
1748
1749    bld.levelf_type = type;
1750    /* we want native vector size to be able to use our intrinsics */
1751    if (bld.num_lods != type.length) {
1752       bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
1753    }
1754    bld.leveli_type = lp_int_type(bld.levelf_type);
1755    bld.float_size_type = bld.float_size_in_type;
1756    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
1757     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
1758    if (bld.num_lods > 1) {
1759       bld.float_size_type.length = bld.num_lods == type.length ?
1760                                       bld.num_lods * bld.float_size_in_type.length :
1761                                       type.length;
1762    }
1763    bld.int_size_type = lp_int_type(bld.float_size_type);
1764
1765    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
1766    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
1767    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
1768    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
1769    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
1770    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
1771    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
1772    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
1773    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
1774    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
1775    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
1776    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
1777
1778    /* Get the dynamic state */
1779    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
1780    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
1781    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
1782    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
1783    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
1784    /* Note that mip_offsets is an array[level] of offsets to texture images */
1785
1786    /* width, height, depth as single int vector */
1787    if (dims <= 1) {
1788       bld.int_size = tex_width;
1789    }
1790    else {
1791       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
1792                                             tex_width, LLVMConstInt(i32t, 0, 0), "");
1793       if (dims >= 2) {
1794          LLVMValueRef tex_height =
1795             dynamic_state->height(dynamic_state, gallivm, texture_index);
1796          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
1797                                                tex_height, LLVMConstInt(i32t, 1, 0), "");
1798          if (dims >= 3) {
1799             LLVMValueRef tex_depth =
1800                dynamic_state->depth(dynamic_state, gallivm, texture_index);
1801             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
1802                                                   tex_depth, LLVMConstInt(i32t, 2, 0), "");
1803          }
1804       }
1805    }
1806
1807    for (i = 0; i < 5; i++) {
1808       newcoords[i] = coords[i];
1809    }
1810
1811    if (0) {
1812       /* For debug: no-op texture sampling */
1813       lp_build_sample_nop(gallivm,
1814                           bld.texel_type,
1815                           newcoords,
1816                           texel_out);
1817    }
1818
1819    else if (is_fetch) {
1820       lp_build_fetch_texel(&bld, texture_index, newcoords,
1821                            explicit_lod, offsets,
1822                            texel_out);
1823    }
1824
1825    else {
1826       LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
1827       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
1828       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
1829                         lp_is_simple_wrap_mode(static_sampler_state->wrap_s) &&
1830                         lp_is_simple_wrap_mode(static_sampler_state->wrap_t) &&
1831                         /* not sure this is strictly needed or simply impossible */
1832                         static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE;
1833
1834       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
1835           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
1836          debug_printf("%s: using floating point linear filtering for %s\n",
1837                       __FUNCTION__, bld.format_desc->short_name);
1838          debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d\n",
1839                       static_sampler_state->min_img_filter,
1840                       static_sampler_state->mag_img_filter,
1841                       static_sampler_state->min_mip_filter,
1842                       static_sampler_state->wrap_s,
1843                       static_sampler_state->wrap_t);
1844       }
1845
1846       lp_build_sample_common(&bld, texture_index, sampler_index,
1847                              newcoords,
1848                              derivs, lod_bias, explicit_lod,
1849                              &lod_ipart, &lod_fpart,
1850                              &ilevel0, &ilevel1);
1851
1852       /*
1853        * we only try 8-wide sampling with soa as it appears to
1854        * be a loss with aos with AVX (but it should work).
1855        * (It should be faster if we'd support avx2)
1856        */
1857       if (num_quads == 1 || !use_aos) {
1858
1859          if (num_quads > 1) {
1860             if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1861                LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
1862                /*
1863                 * These parameters are the same for all quads,
1864                 * could probably simplify.
1865                 */
1866                lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
1867                ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
1868             }
1869          }
1870          if (use_aos) {
1871             /* do sampling/filtering with fixed pt arithmetic */
1872             lp_build_sample_aos(&bld, sampler_index,
1873                                 newcoords[0], newcoords[1],
1874                                 newcoords[2],
1875                                 offsets, lod_ipart, lod_fpart,
1876                                 ilevel0, ilevel1,
1877                                 texel_out);
1878          }
1879
1880          else {
1881             lp_build_sample_general(&bld, sampler_index,
1882                                     newcoords, offsets,
1883                                     lod_ipart, lod_fpart,
1884                                     ilevel0, ilevel1,
1885                                     texel_out);
1886          }
1887       }
1888       else {
1889          unsigned j;
1890          struct lp_build_sample_context bld4;
1891          struct lp_type type4 = type;
1892          unsigned i;
1893          LLVMValueRef texelout4[4];
1894          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
1895
1896          type4.length = 4;
1897
1898          /* Setup our build context */
1899          memset(&bld4, 0, sizeof bld4);
1900          bld4.gallivm = bld.gallivm;
1901          bld4.static_texture_state = bld.static_texture_state;
1902          bld4.static_sampler_state = bld.static_sampler_state;
1903          bld4.dynamic_state = bld.dynamic_state;
1904          bld4.format_desc = bld.format_desc;
1905          bld4.dims = bld.dims;
1906          bld4.row_stride_array = bld.row_stride_array;
1907          bld4.img_stride_array = bld.img_stride_array;
1908          bld4.base_ptr = bld.base_ptr;
1909          bld4.mip_offsets = bld.mip_offsets;
1910          bld4.int_size = bld.int_size;
1911
1912          bld4.vector_width = lp_type_width(type4);
1913
1914          bld4.float_type = lp_type_float(32);
1915          bld4.int_type = lp_type_int(32);
1916          bld4.coord_type = type4;
1917          bld4.int_coord_type = lp_int_type(type4);
1918          bld4.float_size_in_type = lp_type_float(32);
1919          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
1920          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
1921          bld4.texel_type = bld.texel_type;
1922          bld4.texel_type.length = 4;
1923          bld4.levelf_type = type4;
1924          /* we want native vector size to be able to use our intrinsics */
1925          bld4.levelf_type.length = 1;
1926          bld4.leveli_type = lp_int_type(bld4.levelf_type);
1927
1928          if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
1929              ((is_fetch && target != PIPE_BUFFER) ||
1930               (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
1931             bld4.num_lods = type4.length;
1932          else
1933             bld4.num_lods = 1;
1934
1935          bld4.levelf_type = type4;
1936          /* we want native vector size to be able to use our intrinsics */
1937          if (bld4.num_lods != type4.length) {
1938             bld4.levelf_type.length = 1;
1939          }
1940          bld4.leveli_type = lp_int_type(bld4.levelf_type);
1941          bld4.float_size_type = bld4.float_size_in_type;
1942          if (bld4.num_lods > 1) {
1943             bld4.float_size_type.length = bld4.num_lods == type4.length ?
1944                                             bld4.num_lods * bld4.float_size_in_type.length :
1945                                             type4.length;
1946          }
1947          bld4.int_size_type = lp_int_type(bld4.float_size_type);
1948
1949          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
1950          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
1951          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
1952          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
1953          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
1954          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
1955          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
1956          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
1957          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
1958          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
1959          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
1960          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
1961
1962          for (i = 0; i < num_quads; i++) {
1963             LLVMValueRef s4, t4, r4;
1964             LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
1965             LLVMValueRef ilevel04, ilevel14 = NULL;
1966             LLVMValueRef offsets4[4] = { NULL };
1967             unsigned num_lods = bld4.num_lods;
1968
1969             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
1970             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
1971             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
1972
1973             if (offsets[0]) {
1974                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
1975                if (dims > 1) {
1976                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
1977                   if (dims > 2) {
1978                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
1979                   }
1980                }
1981             }
1982             lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
1983             ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
1984             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1985                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
1986                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
1987             }
1988
1989             if (use_aos) {
1990                /* do sampling/filtering with fixed pt arithmetic */
1991                lp_build_sample_aos(&bld4, sampler_index,
1992                                    s4, t4, r4, offsets4,
1993                                    lod_ipart4, lod_fpart4,
1994                                    ilevel04, ilevel14,
1995                                    texelout4);
1996             }
1997
1998             else {
1999                /* this path is currently unreachable and hence might break easily... */
2000                LLVMValueRef newcoords4[5];
2001                newcoords4[0] = s4;
2002                newcoords4[1] = t4;
2003                newcoords4[2] = r4;
2004                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2005                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2006
2007                lp_build_sample_general(&bld4, sampler_index,
2008                                        newcoords4, offsets4,
2009                                        lod_ipart4, lod_fpart4,
2010                                        ilevel04, ilevel14,
2011                                        texelout4);
2012             }
2013             for (j = 0; j < 4; j++) {
2014                texelouttmp[j][i] = texelout4[j];
2015             }
2016          }
2017
2018          for (j = 0; j < 4; j++) {
2019             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2020          }
2021       }
2022    }
2023
2024    if (target != PIPE_BUFFER) {
2025       apply_sampler_swizzle(&bld, texel_out);
2026    }
2027
2028    /*
2029     * texel type can be a (32bit) int/uint (for pure int formats only),
2030     * however we are expected to always return floats (storage is untyped).
2031     */
2032    if (!bld.texel_type.floating) {
2033       unsigned chan;
2034       for (chan = 0; chan < 4; chan++) {
2035          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2036                                             lp_build_vec_type(gallivm, type), "");
2037       }
2038    }
2039 }
2040
2041 void
2042 lp_build_size_query_soa(struct gallivm_state *gallivm,
2043                         const struct lp_static_texture_state *static_state,
2044                         struct lp_sampler_dynamic_state *dynamic_state,
2045                         struct lp_type int_type,
2046                         unsigned texture_unit,
2047                         unsigned target,
2048                         boolean is_sviewinfo,
2049                         enum lp_sampler_lod_property lod_property,
2050                         LLVMValueRef explicit_lod,
2051                         LLVMValueRef *sizes_out)
2052 {
2053    LLVMValueRef lod, level, size;
2054    LLVMValueRef first_level = NULL;
2055    int dims, i;
2056    boolean has_array;
2057    unsigned num_lods = 1;
2058    struct lp_build_context bld_int_vec4;
2059
2060    /*
2061     * Do some sanity verification about bound texture and shader dcl target.
2062     * Not entirely sure what's possible but assume array/non-array
2063     * always compatible (probably not ok for OpenGL but d3d10 has no
2064     * distinction of arrays at the resource level).
2065     * Everything else looks bogus (though not entirely sure about rect/2d).
2066     * Currently disabled because it causes assertion failures if there's
2067     * nothing bound (or rather a dummy texture, not that this case would
2068     * return the right values).
2069     */
2070    if (0 && static_state->target != target) {
2071       if (static_state->target == PIPE_TEXTURE_1D)
2072          assert(target == PIPE_TEXTURE_1D_ARRAY);
2073       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2074          assert(target == PIPE_TEXTURE_1D);
2075       else if (static_state->target == PIPE_TEXTURE_2D)
2076          assert(target == PIPE_TEXTURE_2D_ARRAY);
2077       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2078          assert(target == PIPE_TEXTURE_2D);
2079       else if (static_state->target == PIPE_TEXTURE_CUBE)
2080          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2081       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2082          assert(target == PIPE_TEXTURE_CUBE);
2083       else
2084          assert(0);
2085    }
2086
2087    dims = texture_dims(target);
2088
2089    switch (target) {
2090    case PIPE_TEXTURE_1D_ARRAY:
2091    case PIPE_TEXTURE_2D_ARRAY:
2092       has_array = TRUE;
2093       break;
2094    default:
2095       has_array = FALSE;
2096       break;
2097    }
2098
2099    assert(!int_type.floating);
2100
2101    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2102
2103    if (explicit_lod) {
2104       /* FIXME: this needs to honor per-element lod */
2105       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2106       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2107       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2108       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2109    } else {
2110       lod = bld_int_vec4.zero;
2111    }
2112
2113    size = bld_int_vec4.undef;
2114
2115    size = LLVMBuildInsertElement(gallivm->builder, size,
2116                                  dynamic_state->width(dynamic_state, gallivm, texture_unit),
2117                                  lp_build_const_int32(gallivm, 0), "");
2118
2119    if (dims >= 2) {
2120       size = LLVMBuildInsertElement(gallivm->builder, size,
2121                                     dynamic_state->height(dynamic_state, gallivm, texture_unit),
2122                                     lp_build_const_int32(gallivm, 1), "");
2123    }
2124
2125    if (dims >= 3) {
2126       size = LLVMBuildInsertElement(gallivm->builder, size,
2127                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2128                                     lp_build_const_int32(gallivm, 2), "");
2129    }
2130
2131    size = lp_build_minify(&bld_int_vec4, size, lod);
2132
2133    if (has_array)
2134       size = LLVMBuildInsertElement(gallivm->builder, size,
2135                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2136                                     lp_build_const_int32(gallivm, dims), "");
2137
2138    /*
2139     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2140     * if level is out of bounds (note this can't cover unbound texture
2141     * here, which also requires returning zero).
2142     */
2143    if (explicit_lod && is_sviewinfo) {
2144       LLVMValueRef last_level, out, out1;
2145       struct lp_build_context leveli_bld;
2146
2147       /* everything is scalar for now */
2148       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2149       last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2150
2151       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2152       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2153       out = lp_build_or(&leveli_bld, out, out1);
2154       if (num_lods == 1) {
2155          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2156       }
2157       else {
2158          /* TODO */
2159          assert(0);
2160       }
2161       size = lp_build_andnot(&bld_int_vec4, size, out);
2162    }
2163    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2164       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2165                                                 size,
2166                                                 lp_build_const_int32(gallivm, i));
2167    }
2168    if (is_sviewinfo) {
2169       for (; i < 4; i++) {
2170          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2171       }
2172    }
2173
2174    /*
2175     * if there's no explicit_lod (buffers, rects) queries requiring nr of
2176     * mips would be illegal.
2177     */
2178    if (is_sviewinfo && explicit_lod) {
2179       struct lp_build_context bld_int_scalar;
2180       LLVMValueRef num_levels;
2181       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2182
2183       if (static_state->level_zero_only) {
2184          num_levels = bld_int_scalar.one;
2185       }
2186       else {
2187          LLVMValueRef last_level;
2188
2189          last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2190          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2191          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2192       }
2193       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2194                                         num_levels);
2195    }
2196 }