src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_shader_tokens.h"
  39 #include "util/u_debug.h"
  40 #include "util/u_dump.h"
  41 #include "util/u_memory.h"
  42 #include "util/u_math.h"
  43 #include "util/u_format.h"
  44 #include "util/u_cpu_detect.h"
  45 #include "util/format_rgb9e5.h"
  46 #include "lp_bld_debug.h"
  47 #include "lp_bld_type.h"
  48 #include "lp_bld_const.h"
  49 #include "lp_bld_conv.h"
  50 #include "lp_bld_arit.h"
  51 #include "lp_bld_bitarit.h"
  52 #include "lp_bld_logic.h"
  53 #include "lp_bld_printf.h"
  54 #include "lp_bld_swizzle.h"
  55 #include "lp_bld_flow.h"
  56 #include "lp_bld_gather.h"
  57 #include "lp_bld_format.h"
  58 #include "lp_bld_sample.h"
  59 #include "lp_bld_sample_aos.h"
  60 #include "lp_bld_struct.h"
  61 #include "lp_bld_quad.h"
  62 #include "lp_bld_pack.h"
  63 #include "lp_bld_intr.h"
  64
  65
  66 /**
  67  * Generate code to fetch a texel from a texture at int coords (x, y, z).
  68  * The computation depends on whether the texture is 1D, 2D or 3D.
  69  * The result, texel, will be float vectors:
  70  *   texel[0] = red values
  71  *   texel[1] = green values
  72  *   texel[2] = blue values
  73  *   texel[3] = alpha values
  74  */
  75 static void
  76 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  77                           LLVMValueRef width,
  78                           LLVMValueRef height,
  79                           LLVMValueRef depth,
  80                           LLVMValueRef x,
  81                           LLVMValueRef y,
  82                           LLVMValueRef z,
  83                           LLVMValueRef y_stride,
  84                           LLVMValueRef z_stride,
  85                           LLVMValueRef data_ptr,
  86                           LLVMValueRef mipoffsets,
  87                           LLVMValueRef texel_out[4])
  88 {
  89    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
  90    const unsigned dims = bld->dims;
  91    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  92    LLVMBuilderRef builder = bld->gallivm->builder;
  93    LLVMValueRef offset;
  94    LLVMValueRef i, j;
  95    LLVMValueRef use_border = NULL;
  96
  97    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
  98    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
  99                                               static_state->min_img_filter,
 100                                               static_state->mag_img_filter)) {
 101       LLVMValueRef b1, b2;
 102       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 103       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 104       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 105    }
 106
 107    if (dims >= 2 &&
 108        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
 109                                               static_state->min_img_filter,
 110                                               static_state->mag_img_filter)) {
 111       LLVMValueRef b1, b2;
 112       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 113       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 114       if (use_border) {
 115          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 116          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 117       }
 118       else {
 119          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 120       }
 121    }
 122
 123    if (dims == 3 &&
 124        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
 125                                               static_state->min_img_filter,
 126                                               static_state->mag_img_filter)) {
 127       LLVMValueRef b1, b2;
 128       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 129       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 130       if (use_border) {
 131          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 132          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 133       }
 134       else {
 135          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 136       }
 137    }
 138
 139    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 140    lp_build_sample_offset(&bld->int_coord_bld,
 141                           bld->format_desc,
 142                           x, y, z, y_stride, z_stride,
 143                           &offset, &i, &j);
 144    if (mipoffsets) {
 145       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 146    }
 147
 148    if (use_border) {
 149       /* If we can sample the border color, it means that texcoords may
 150        * lie outside the bounds of the texture image.  We need to do
 151        * something to prevent reading out of bounds and causing a segfault.
 152        *
 153        * Simply AND the texture coords with !use_border.  This will cause
 154        * coords which are out of bounds to become zero.  Zero's guaranteed
 155        * to be inside the texture image.
 156        */
 157       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
 158    }
 159
 160    lp_build_fetch_rgba_soa(bld->gallivm,
 161                            bld->format_desc,
 162                            bld->texel_type, TRUE,
 163                            data_ptr, offset,
 164                            i, j,
 165                            bld->cache,
 166                            texel_out);
 167
 168    /*
 169     * Note: if we find an app which frequently samples the texture border
 170     * we might want to implement a true conditional here to avoid sampling
 171     * the texture whenever possible (since that's quite a bit of code).
 172     * Ex:
 173     *   if (use_border) {
 174     *      texel = border_color;
 175     *   }
 176     *   else {
 177     *      texel = sample_texture(coord);
 178     *   }
 179     * As it is now, we always sample the texture, then selectively replace
 180     * the texel color results with the border color.
 181     */
 182
 183    if (use_border) {
 184       /* select texel color or border color depending on use_border. */
 185       const struct util_format_description *format_desc = bld->format_desc;
 186       int chan;
 187       struct lp_type border_type = bld->texel_type;
 188       border_type.length = 4;
 189       /*
 190        * Only replace channels which are actually present. The others should
 191        * get optimized away eventually by sampler_view swizzle anyway but it's
 192        * easier too.
 193        */
 194       for (chan = 0; chan < 4; chan++) {
 195          unsigned chan_s;
 196          /* reverse-map channel... */
 197          for (chan_s = 0; chan_s < 4; chan_s++) {
 198             if (chan_s == format_desc->swizzle[chan]) {
 199                break;
 200             }
 201          }
 202          if (chan_s <= 3) {
 203             /* use the already clamped color */
 204             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
 205             LLVMValueRef border_chan;
 206
 207             border_chan = lp_build_extract_broadcast(bld->gallivm,
 208                                                      border_type,
 209                                                      bld->texel_type,
 210                                                      bld->border_color_clamped,
 211                                                      idx);
 212             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 213                                               border_chan, texel_out[chan]);
 214          }
 215       }
 216    }
 217 }
 218
 219
 220 /**
 221  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
 222  * (Note that with pot sizes could do this much more easily post-scale
 223  * with some bit arithmetic.)
 224  */
 225 static LLVMValueRef
 226 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 227                       LLVMValueRef coord, boolean posOnly)
 228 {
 229    struct lp_build_context *coord_bld = &bld->coord_bld;
 230    LLVMValueRef fract;
 231    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 232
 233    /*
 234     * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
 235     * it all works out. (The result is in range [-1, 1.0], negative if
 236     * the coord is in the "odd" section, otherwise positive.)
 237     */
 238
 239    coord = lp_build_mul(coord_bld, coord, half);
 240    fract = lp_build_round(coord_bld, coord);
 241    fract = lp_build_sub(coord_bld, coord, fract);
 242    coord = lp_build_add(coord_bld, fract, fract);
 243
 244    if (posOnly) {
 245       /*
 246        * Theoretically it's not quite 100% accurate because the spec says
 247        * that ultimately a scaled coord of -x.0 should map to int coord
 248        * -x + 1 with mirroring, not -x (this does not matter for bilinear
 249        * filtering).
 250        */
 251       coord = lp_build_abs(coord_bld, coord);
 252       /* kill off NaNs */
 253       /* XXX: not safe without arch rounding, fract can be anything. */
 254       coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
 255                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
 256    }
 257
 258    return coord;
 259 }
 260
 261
 262 /**
 263  * Helper to compute the first coord and the weight for
 264  * linear wrap repeat npot textures
 265  */
 266 void
 267 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
 268                                   LLVMValueRef coord_f,
 269                                   LLVMValueRef length_i,
 270                                   LLVMValueRef length_f,
 271                                   LLVMValueRef *coord0_i,
 272                                   LLVMValueRef *weight_f)
 273 {
 274    struct lp_build_context *coord_bld = &bld->coord_bld;
 275    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 276    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 277    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 278                                                 int_coord_bld->one);
 279    LLVMValueRef mask;
 280    /* wrap with normalized floats is just fract */
 281    coord_f = lp_build_fract(coord_bld, coord_f);
 282    /* mul by size and subtract 0.5 */
 283    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 284    coord_f = lp_build_sub(coord_bld, coord_f, half);
 285    /*
 286     * we avoided the 0.5/length division before the repeat wrap,
 287     * now need to fix up edge cases with selects
 288     */
 289    /*
 290     * Note we do a float (unordered) compare so we can eliminate NaNs.
 291     * (Otherwise would need fract_safe above).
 292     */
 293    mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
 294                            PIPE_FUNC_LESS, coord_f, coord_bld->zero);
 295
 296    /* convert to int, compute lerp weight */
 297    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
 298    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 299 }
 300
 301
 302 /**
 303  * Build LLVM code for texture wrap mode for linear filtering.
 304  * \param x0_out  returns first integer texcoord
 305  * \param x1_out  returns second integer texcoord
 306  * \param weight_out  returns linear interpolation weight
 307  */
 308 static void
 309 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 310                             boolean is_gather,
 311                             LLVMValueRef coord,
 312                             LLVMValueRef length,
 313                             LLVMValueRef length_f,
 314                             LLVMValueRef offset,
 315                             boolean is_pot,
 316                             unsigned wrap_mode,
 317                             LLVMValueRef *x0_out,
 318                             LLVMValueRef *x1_out,
 319                             LLVMValueRef *weight_out)
 320 {
 321    struct lp_build_context *coord_bld = &bld->coord_bld;
 322    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 323    LLVMBuilderRef builder = bld->gallivm->builder;
 324    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 325    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 326    LLVMValueRef coord0, coord1, weight;
 327
 328    switch(wrap_mode) {
 329    case PIPE_TEX_WRAP_REPEAT:
 330       if (is_pot) {
 331          /* mul by size and subtract 0.5 */
 332          coord = lp_build_mul(coord_bld, coord, length_f);
 333          coord = lp_build_sub(coord_bld, coord, half);
 334          if (offset) {
 335             offset = lp_build_int_to_float(coord_bld, offset);
 336             coord = lp_build_add(coord_bld, coord, offset);
 337          }
 338          /* convert to int, compute lerp weight */
 339          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 340          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 341          /* repeat wrap */
 342          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 343          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 344       }
 345       else {
 346          LLVMValueRef mask;
 347          if (offset) {
 348             offset = lp_build_int_to_float(coord_bld, offset);
 349             offset = lp_build_div(coord_bld, offset, length_f);
 350             coord = lp_build_add(coord_bld, coord, offset);
 351          }
 352          lp_build_coord_repeat_npot_linear(bld, coord,
 353                                            length, length_f,
 354                                            &coord0, &weight);
 355          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 356                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 357          coord1 = LLVMBuildAnd(builder,
 358                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
 359                                mask, "");
 360       }
 361       break;
 362
 363    case PIPE_TEX_WRAP_CLAMP:
 364       if (bld->static_sampler_state->normalized_coords) {
 365          /* scale coord to length */
 366          coord = lp_build_mul(coord_bld, coord, length_f);
 367       }
 368       if (offset) {
 369          offset = lp_build_int_to_float(coord_bld, offset);
 370          coord = lp_build_add(coord_bld, coord, offset);
 371       }
 372
 373       /*
 374        * clamp to [0, length]
 375        *
 376        * Unlike some other wrap modes, this should be correct for gather
 377        * too. GL_CLAMP explicitly does this clamp on the coord prior to
 378        * actual wrapping (which is per sample).
 379        */
 380       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 381
 382       coord = lp_build_sub(coord_bld, coord, half);
 383
 384       /* convert to int, compute lerp weight */
 385       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 386       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 387       break;
 388
 389    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 390       {
 391          struct lp_build_context abs_coord_bld = bld->coord_bld;
 392          abs_coord_bld.type.sign = FALSE;
 393
 394          if (bld->static_sampler_state->normalized_coords) {
 395             /* mul by tex size */
 396             coord = lp_build_mul(coord_bld, coord, length_f);
 397          }
 398          if (offset) {
 399             offset = lp_build_int_to_float(coord_bld, offset);
 400             coord = lp_build_add(coord_bld, coord, offset);
 401          }
 402
 403          /* clamp to length max */
 404          coord = lp_build_min_ext(coord_bld, coord, length_f,
 405                                   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
 406          if (!is_gather) {
 407             /* subtract 0.5 */
 408             coord = lp_build_sub(coord_bld, coord, half);
 409             /* clamp to [0, length - 0.5] */
 410             coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 411             /* convert to int, compute lerp weight */
 412             lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 413             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 414          } else {
 415             /*
 416              * The non-gather path will end up with coords 0, 1 if coord was
 417              * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
 418              * really matter what the second coord is). But for gather, we
 419              * really need to end up with coords 0, 0.
 420              */
 421             coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 422             coord0 = lp_build_sub(coord_bld, coord, half);
 423             coord1 = lp_build_add(coord_bld, coord, half);
 424             /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
 425             coord0 = lp_build_itrunc(coord_bld, coord0);
 426             coord1 = lp_build_itrunc(coord_bld, coord1);
 427             weight = coord_bld->undef;
 428          }
 429          /* coord1 = min(coord1, length-1) */
 430          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 431          break;
 432       }
 433
 434    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 435       if (bld->static_sampler_state->normalized_coords) {
 436          /* scale coord to length */
 437          coord = lp_build_mul(coord_bld, coord, length_f);
 438       }
 439       if (offset) {
 440          offset = lp_build_int_to_float(coord_bld, offset);
 441          coord = lp_build_add(coord_bld, coord, offset);
 442       }
 443       /*
 444        * We don't need any clamp. Technically, for very large (pos or neg)
 445        * (or infinite) values, clamp against [-length, length] would be
 446        * correct, but we don't need to guarantee any specific
 447        * result for such coords (the ifloor will be undefined, but for modes
 448        * requiring border all resulting coords are safe).
 449        */
 450       coord = lp_build_sub(coord_bld, coord, half);
 451       /* convert to int, compute lerp weight */
 452       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 453       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 454       break;
 455
 456    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 457       if (offset) {
 458          offset = lp_build_int_to_float(coord_bld, offset);
 459          offset = lp_build_div(coord_bld, offset, length_f);
 460          coord = lp_build_add(coord_bld, coord, offset);
 461       }
 462       if (!is_gather) {
 463          /* compute mirror function */
 464          coord = lp_build_coord_mirror(bld, coord, TRUE);
 465
 466          /* scale coord to length */
 467          coord = lp_build_mul(coord_bld, coord, length_f);
 468          coord = lp_build_sub(coord_bld, coord, half);
 469
 470          /* convert to int, compute lerp weight */
 471          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 472          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 473
 474          /* coord0 = max(coord0, 0) */
 475          coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 476          /* coord1 = min(coord1, length-1) */
 477          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 478       } else {
 479          /*
 480           * This is pretty reasonable in the end,  all what the tests care
 481           * about is nasty edge cases (scaled coords x.5, so the individual
 482           * coords are actually integers, which is REALLY tricky to get right
 483           * due to this working differently both for negative numbers as well
 484           * as for even/odd cases). But with enough magic it's not too complex
 485           * after all.
 486           * Maybe should try a bit arithmetic one though for POT textures...
 487           */
 488          LLVMValueRef isNeg;
 489          /*
 490           * Wrapping just once still works, even though it means we can
 491           * get "wrong" sign due to performing mirror in the middle of the
 492           * two coords (because this can only happen very near the odd/even
 493           * edges, so both coords will actually end up as 0 or length - 1
 494           * in the end).
 495           * For GL4 gather with per-sample offsets we'd need to the mirroring
 496           * per coord too.
 497           */
 498          coord = lp_build_coord_mirror(bld, coord, FALSE);
 499          coord = lp_build_mul(coord_bld, coord, length_f);
 500
 501          /*
 502           * NaNs should be safe here, we'll do away with them with
 503           * the ones' complement plus min.
 504           */
 505          coord0 = lp_build_sub(coord_bld, coord, half);
 506          coord0 = lp_build_ifloor(coord_bld, coord0);
 507          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 508          /* ones complement for neg numbers (mirror(negX) = X - 1)  */
 509          isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
 510                               coord0, int_coord_bld->zero);
 511          coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
 512          isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
 513                               coord1, int_coord_bld->zero);
 514          coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
 515          coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
 516          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 517
 518          weight = coord_bld->undef;
 519       }
 520       break;
 521
 522    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 523       if (bld->static_sampler_state->normalized_coords) {
 524          /* scale coord to length */
 525          coord = lp_build_mul(coord_bld, coord, length_f);
 526       }
 527       if (offset) {
 528          offset = lp_build_int_to_float(coord_bld, offset);
 529          coord = lp_build_add(coord_bld, coord, offset);
 530       }
 531       /*
 532        * XXX: probably not correct for gather, albeit I'm not
 533        * entirely sure as it's poorly specified. The wrapping looks
 534        * correct according to the spec which is against gl 1.2.1,
 535        * however negative values will be swapped - gl re-specified
 536        * wrapping with newer versions (no more pre-clamp except with
 537        * GL_CLAMP).
 538        */
 539       coord = lp_build_abs(coord_bld, coord);
 540
 541       /* clamp to [0, length] */
 542       coord = lp_build_min_ext(coord_bld, coord, length_f,
 543                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
 544
 545       coord = lp_build_sub(coord_bld, coord, half);
 546
 547       /* convert to int, compute lerp weight */
 548       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 549       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 550       break;
 551
 552    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 553       {
 554          struct lp_build_context abs_coord_bld = bld->coord_bld;
 555          abs_coord_bld.type.sign = FALSE;
 556
 557          if (bld->static_sampler_state->normalized_coords) {
 558             /* scale coord to length */
 559             coord = lp_build_mul(coord_bld, coord, length_f);
 560          }
 561          if (offset) {
 562             offset = lp_build_int_to_float(coord_bld, offset);
 563             coord = lp_build_add(coord_bld, coord, offset);
 564          }
 565          if (!is_gather) {
 566             coord = lp_build_abs(coord_bld, coord);
 567
 568             /* clamp to length max */
 569             coord = lp_build_min_ext(coord_bld, coord, length_f,
 570                                      GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
 571             /* subtract 0.5 */
 572             coord = lp_build_sub(coord_bld, coord, half);
 573             /* clamp to [0, length - 0.5] */
 574             coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 575
 576             /* convert to int, compute lerp weight */
 577             lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 578             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 579             /* coord1 = min(coord1, length-1) */
 580             coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 581          } else {
 582             /*
 583              * The non-gather path will swap coord0/1 if coord was negative,
 584              * which is ok for filtering since the filter weight matches
 585              * accordingly. Also, if coord is close to zero, coord0/1 will
 586              * be 0 and 1, instead of 0 and 0 (again ok due to filter
 587              * weight being 0.0). Both issues need to be fixed for gather.
 588              */
 589             LLVMValueRef isNeg;
 590
 591             /*
 592              * Actually wanted to cheat here and use:
 593              * coord1 = lp_build_iround(coord_bld, coord);
 594              * but it's not good enough for some tests (even piglit
 595              * textureGather is set up in a way so the coords area always
 596              * .5, that is right at the crossover points).
 597              * So do ordinary sub/floor, then do ones' complement
 598              * for negative numbers.
 599              * (Note can't just do sub|add/abs/itrunc per coord neither -
 600              * because the spec demands that mirror(3.0) = 3 but
 601              * mirror(-3.0) = 2.)
 602              */
 603             coord = lp_build_sub(coord_bld, coord, half);
 604             coord0 = lp_build_ifloor(coord_bld, coord);
 605             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 606             isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
 607                                  int_coord_bld->zero);
 608             coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
 609             coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
 610
 611             isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
 612                                  int_coord_bld->zero);
 613             coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
 614             coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 615
 616             weight = coord_bld->undef;
 617          }
 618       }
 619       break;
 620
 621    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 622       {
 623          if (bld->static_sampler_state->normalized_coords) {
 624             /* scale coord to length */
 625             coord = lp_build_mul(coord_bld, coord, length_f);
 626          }
 627          if (offset) {
 628             offset = lp_build_int_to_float(coord_bld, offset);
 629             coord = lp_build_add(coord_bld, coord, offset);
 630          }
 631          /*
 632           * XXX: probably not correct for gather due to swapped
 633           * order if coord is negative (same rationale as for
 634           * MIRROR_CLAMP).
 635           */
 636          coord = lp_build_abs(coord_bld, coord);
 637
 638          /*
 639           * We don't need any clamp. Technically, for very large
 640           * (or infinite) values, clamp against length would be
 641           * correct, but we don't need to guarantee any specific
 642           * result for such coords (the ifloor will be undefined, but
 643           * for modes requiring border all resulting coords are safe).
 644           */
 645          coord = lp_build_sub(coord_bld, coord, half);
 646
 647          /* convert to int, compute lerp weight */
 648          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 649          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 650       }
 651       break;
 652
 653    default:
 654       assert(0);
 655       coord0 = NULL;
 656       coord1 = NULL;
 657       weight = NULL;
 658    }
 659
 660    *x0_out = coord0;
 661    *x1_out = coord1;
 662    *weight_out = weight;
 663 }
 664
 665
 666 /**
 667  * Build LLVM code for texture wrap mode for nearest filtering.
 668  * \param coord  the incoming texcoord (nominally in [0,1])
 669  * \param length  the texture size along one dimension, as int vector
 670  * \param length_f  the texture size along one dimension, as float vector
 671  * \param offset  texel offset along one dimension (as int vector)
 672  * \param is_pot  if TRUE, length is a power of two
 673  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 674  */
 675 static LLVMValueRef
 676 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 677                              LLVMValueRef coord,
 678                              LLVMValueRef length,
 679                              LLVMValueRef length_f,
 680                              LLVMValueRef offset,
 681                              boolean is_pot,
 682                              unsigned wrap_mode)
 683 {
 684    struct lp_build_context *coord_bld = &bld->coord_bld;
 685    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 686    LLVMBuilderRef builder = bld->gallivm->builder;
 687    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 688    LLVMValueRef icoord;
 689
 690    switch(wrap_mode) {
 691    case PIPE_TEX_WRAP_REPEAT:
 692       if (is_pot) {
 693          coord = lp_build_mul(coord_bld, coord, length_f);
 694          icoord = lp_build_ifloor(coord_bld, coord);
 695          if (offset) {
 696             icoord = lp_build_add(int_coord_bld, icoord, offset);
 697          }
 698          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
 699       }
 700       else {
 701           if (offset) {
 702              offset = lp_build_int_to_float(coord_bld, offset);
 703              offset = lp_build_div(coord_bld, offset, length_f);
 704              coord = lp_build_add(coord_bld, coord, offset);
 705           }
 706           /* take fraction, unnormalize */
 707           coord = lp_build_fract_safe(coord_bld, coord);
 708           coord = lp_build_mul(coord_bld, coord, length_f);
 709           icoord = lp_build_itrunc(coord_bld, coord);
 710       }
 711       break;
 712
 713    case PIPE_TEX_WRAP_CLAMP:
 714    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 715       if (bld->static_sampler_state->normalized_coords) {
 716          /* scale coord to length */
 717          coord = lp_build_mul(coord_bld, coord, length_f);
 718       }
 719
 720       if (offset) {
 721          offset = lp_build_int_to_float(coord_bld, offset);
 722          coord = lp_build_add(coord_bld, coord, offset);
 723       }
 724       /* floor */
 725       /* use itrunc instead since we clamp to 0 anyway */
 726       icoord = lp_build_itrunc(coord_bld, coord);
 727
 728       /* clamp to [0, length - 1]. */
 729       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 730                               length_minus_one);
 731       break;
 732
 733    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 734       if (bld->static_sampler_state->normalized_coords) {
 735          /* scale coord to length */
 736          coord = lp_build_mul(coord_bld, coord, length_f);
 737       }
 738       /* no clamp necessary, border masking will handle this */
 739       icoord = lp_build_ifloor(coord_bld, coord);
 740       if (offset) {
 741          icoord = lp_build_add(int_coord_bld, icoord, offset);
 742       }
 743       break;
 744
 745    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 746       if (offset) {
 747          offset = lp_build_int_to_float(coord_bld, offset);
 748          offset = lp_build_div(coord_bld, offset, length_f);
 749          coord = lp_build_add(coord_bld, coord, offset);
 750       }
 751       /* compute mirror function */
 752       coord = lp_build_coord_mirror(bld, coord, TRUE);
 753
 754       /* scale coord to length */
 755       assert(bld->static_sampler_state->normalized_coords);
 756       coord = lp_build_mul(coord_bld, coord, length_f);
 757
 758       /* itrunc == ifloor here */
 759       icoord = lp_build_itrunc(coord_bld, coord);
 760
 761       /* clamp to [0, length - 1] */
 762       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 763       break;
 764
 765    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 766    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 767       if (bld->static_sampler_state->normalized_coords) {
 768          /* scale coord to length */
 769          coord = lp_build_mul(coord_bld, coord, length_f);
 770       }
 771       if (offset) {
 772          offset = lp_build_int_to_float(coord_bld, offset);
 773          coord = lp_build_add(coord_bld, coord, offset);
 774       }
 775       coord = lp_build_abs(coord_bld, coord);
 776
 777       /* itrunc == ifloor here */
 778       icoord = lp_build_itrunc(coord_bld, coord);
 779       /*
 780        * Use unsigned min due to possible undef values (NaNs, overflow)
 781        */
 782       {
 783          struct lp_build_context abs_coord_bld = *int_coord_bld;
 784          abs_coord_bld.type.sign = FALSE;
 785          /* clamp to [0, length - 1] */
 786          icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
 787       }
 788       break;
 789
 790    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 791       if (bld->static_sampler_state->normalized_coords) {
 792          /* scale coord to length */
 793          coord = lp_build_mul(coord_bld, coord, length_f);
 794       }
 795       if (offset) {
 796          offset = lp_build_int_to_float(coord_bld, offset);
 797          coord = lp_build_add(coord_bld, coord, offset);
 798       }
 799       coord = lp_build_abs(coord_bld, coord);
 800
 801       /* itrunc == ifloor here */
 802       icoord = lp_build_itrunc(coord_bld, coord);
 803       break;
 804
 805    default:
 806       assert(0);
 807       icoord = NULL;
 808    }
 809
 810    return icoord;
 811 }
 812
 813
 814 /**
 815  * Do shadow test/comparison.
 816  * \param p shadow ref value
 817  * \param texel  the texel to compare against
 818  */
 819 static LLVMValueRef
 820 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
 821                             LLVMValueRef p,
 822                             LLVMValueRef texel)
 823 {
 824    struct lp_build_context *texel_bld = &bld->texel_bld;
 825    LLVMValueRef res;
 826
 827    if (0) {
 828       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
 829       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
 830    }
 831
 832    /* result = (p FUNC texel) ? 1 : 0 */
 833    /*
 834     * honor d3d10 floating point rules here, which state that comparisons
 835     * are ordered except NOT_EQUAL which is unordered.
 836     */
 837    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
 838       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
 839                                  p, texel);
 840    }
 841    else {
 842       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
 843                          p, texel);
 844    }
 845    return res;
 846 }
 847
 848
 849 /**
 850  * Generate code to sample a mipmap level with nearest filtering.
 851  * If sampling a cube texture, r = cube face in [0,5].
 852  */
 853 static void
 854 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 855                               LLVMValueRef size,
 856                               LLVMValueRef row_stride_vec,
 857                               LLVMValueRef img_stride_vec,
 858                               LLVMValueRef data_ptr,
 859                               LLVMValueRef mipoffsets,
 860                               LLVMValueRef *coords,
 861                               const LLVMValueRef *offsets,
 862                               LLVMValueRef colors_out[4])
 863 {
 864    const unsigned dims = bld->dims;
 865    LLVMValueRef width_vec;
 866    LLVMValueRef height_vec;
 867    LLVMValueRef depth_vec;
 868    LLVMValueRef flt_size;
 869    LLVMValueRef flt_width_vec;
 870    LLVMValueRef flt_height_vec;
 871    LLVMValueRef flt_depth_vec;
 872    LLVMValueRef x, y = NULL, z = NULL;
 873
 874    lp_build_extract_image_sizes(bld,
 875                                 &bld->int_size_bld,
 876                                 bld->int_coord_type,
 877                                 size,
 878                                 &width_vec, &height_vec, &depth_vec);
 879
 880    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 881
 882    lp_build_extract_image_sizes(bld,
 883                                 &bld->float_size_bld,
 884                                 bld->coord_type,
 885                                 flt_size,
 886                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 887
 888    /*
 889     * Compute integer texcoords.
 890     */
 891    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
 892                                     flt_width_vec, offsets[0],
 893                                     bld->static_texture_state->pot_width,
 894                                     bld->static_sampler_state->wrap_s);
 895    lp_build_name(x, "tex.x.wrapped");
 896
 897    if (dims >= 2) {
 898       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
 899                                        flt_height_vec, offsets[1],
 900                                        bld->static_texture_state->pot_height,
 901                                        bld->static_sampler_state->wrap_t);
 902       lp_build_name(y, "tex.y.wrapped");
 903
 904       if (dims == 3) {
 905          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
 906                                           flt_depth_vec, offsets[2],
 907                                           bld->static_texture_state->pot_depth,
 908                                           bld->static_sampler_state->wrap_r);
 909          lp_build_name(z, "tex.z.wrapped");
 910       }
 911    }
 912    if (has_layer_coord(bld->static_texture_state->target)) {
 913       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
 914          /* add cube layer to face */
 915          z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
 916       }
 917       else {
 918          z = coords[2];
 919       }
 920       lp_build_name(z, "tex.z.layer");
 921    }
 922
 923    /*
 924     * Get texture colors.
 925     */
 926    lp_build_sample_texel_soa(bld,
 927                              width_vec, height_vec, depth_vec,
 928                              x, y, z,
 929                              row_stride_vec, img_stride_vec,
 930                              data_ptr, mipoffsets, colors_out);
 931
 932    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
 933       LLVMValueRef cmpval;
 934       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
 935       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
 936       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
 937                                       bld->texel_bld.one, bld->texel_bld.zero);
 938       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 939    }
 940
 941 }
 942
 943
 944 /**
 945  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
 946  */
 947 static LLVMValueRef
 948 lp_build_masklerp(struct lp_build_context *bld,
 949                  LLVMValueRef weight,
 950                  LLVMValueRef mask0,
 951                  LLVMValueRef mask1)
 952 {
 953    struct gallivm_state *gallivm = bld->gallivm;
 954    LLVMBuilderRef builder = gallivm->builder;
 955    LLVMValueRef weight2;
 956
 957    weight2 = lp_build_sub(bld, bld->one, weight);
 958    weight = LLVMBuildBitCast(builder, weight,
 959                               lp_build_int_vec_type(gallivm, bld->type), "");
 960    weight2 = LLVMBuildBitCast(builder, weight2,
 961                               lp_build_int_vec_type(gallivm, bld->type), "");
 962    weight = LLVMBuildAnd(builder, weight, mask1, "");
 963    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
 964    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
 965    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
 966    return lp_build_add(bld, weight, weight2);
 967 }
 968
 969 /**
 970  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
 971  */
 972 static LLVMValueRef
 973 lp_build_masklerp2d(struct lp_build_context *bld,
 974                     LLVMValueRef weight0,
 975                     LLVMValueRef weight1,
 976                     LLVMValueRef mask00,
 977                     LLVMValueRef mask01,
 978                     LLVMValueRef mask10,
 979                     LLVMValueRef mask11)
 980 {
 981    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
 982    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
 983    return lp_build_lerp(bld, weight1, val0, val1, 0);
 984 }
 985
 986 /*
 987  * this is a bit excessive code for something OpenGL just recommends
 988  * but does not require.
 989  */
 990 #define ACCURATE_CUBE_CORNERS 1
 991
 992 /**
 993  * Generate code to sample a mipmap level with linear filtering.
 994  * If sampling a cube texture, r = cube face in [0,5].
 995  * If linear_mask is present, only pixels having their mask set
 996  * will receive linear filtering, the rest will use nearest.
 997  */
 998 static void
 999 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1000                              boolean is_gather,
1001                              LLVMValueRef size,
1002                              LLVMValueRef linear_mask,
1003                              LLVMValueRef row_stride_vec,
1004                              LLVMValueRef img_stride_vec,
1005                              LLVMValueRef data_ptr,
1006                              LLVMValueRef mipoffsets,
1007                              LLVMValueRef *coords,
1008                              const LLVMValueRef *offsets,
1009                              LLVMValueRef colors_out[4])
1010 {
1011    LLVMBuilderRef builder = bld->gallivm->builder;
1012    struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1013    struct lp_build_context *coord_bld = &bld->coord_bld;
1014    struct lp_build_context *texel_bld = &bld->texel_bld;
1015    const unsigned dims = bld->dims;
1016    LLVMValueRef width_vec;
1017    LLVMValueRef height_vec;
1018    LLVMValueRef depth_vec;
1019    LLVMValueRef flt_size;
1020    LLVMValueRef flt_width_vec;
1021    LLVMValueRef flt_height_vec;
1022    LLVMValueRef flt_depth_vec;
1023    LLVMValueRef fall_off[4], have_corners;
1024    LLVMValueRef z1 = NULL;
1025    LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1026    LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1027    LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1028    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1029    LLVMValueRef xs[4], ys[4], zs[4];
1030    LLVMValueRef neighbors[2][2][4];
1031    int chan, texel_index;
1032    boolean seamless_cube_filter, accurate_cube_corners;
1033
1034    seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1035                            bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1036                           bld->static_sampler_state->seamless_cube_map;
1037    /*
1038     * XXX I don't know how this is really supposed to work with gather. From GL
1039     * spec wording (not gather specific) it sounds like the 4th missing texel
1040     * should be an average of the other 3, hence for gather could return this.
1041     * This is however NOT how the code here works, which just fixes up the
1042     * weights used for filtering instead. And of course for gather there is
1043     * no filter to tweak...
1044     */
1045    accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1046                            !is_gather;
1047
1048    lp_build_extract_image_sizes(bld,
1049                                 &bld->int_size_bld,
1050                                 bld->int_coord_type,
1051                                 size,
1052                                 &width_vec, &height_vec, &depth_vec);
1053
1054    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1055
1056    lp_build_extract_image_sizes(bld,
1057                                 &bld->float_size_bld,
1058                                 bld->coord_type,
1059                                 flt_size,
1060                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1061
1062    /*
1063     * Compute integer texcoords.
1064     */
1065
1066    if (!seamless_cube_filter) {
1067       lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1068                                   flt_width_vec, offsets[0],
1069                                   bld->static_texture_state->pot_width,
1070                                   bld->static_sampler_state->wrap_s,
1071                                   &x00, &x01, &s_fpart);
1072       lp_build_name(x00, "tex.x0.wrapped");
1073       lp_build_name(x01, "tex.x1.wrapped");
1074       x10 = x00;
1075       x11 = x01;
1076
1077       if (dims >= 2) {
1078          lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1079                                      flt_height_vec, offsets[1],
1080                                      bld->static_texture_state->pot_height,
1081                                      bld->static_sampler_state->wrap_t,
1082                                      &y00, &y10, &t_fpart);
1083          lp_build_name(y00, "tex.y0.wrapped");
1084          lp_build_name(y10, "tex.y1.wrapped");
1085          y01 = y00;
1086          y11 = y10;
1087
1088          if (dims == 3) {
1089             lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1090                                         flt_depth_vec, offsets[2],
1091                                         bld->static_texture_state->pot_depth,
1092                                         bld->static_sampler_state->wrap_r,
1093                                         &z00, &z1, &r_fpart);
1094             z01 = z10 = z11 = z00;
1095             lp_build_name(z00, "tex.z0.wrapped");
1096             lp_build_name(z1, "tex.z1.wrapped");
1097          }
1098       }
1099       if (has_layer_coord(bld->static_texture_state->target)) {
1100          if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1101             /* add cube layer to face */
1102             z00 = z01 = z10 = z11 = z1 =
1103                lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1104          }
1105          else {
1106             z00 = z01 = z10 = z11 = z1 = coords[2];  /* cube face or layer */
1107          }
1108          lp_build_name(z00, "tex.z0.layer");
1109          lp_build_name(z1, "tex.z1.layer");
1110       }
1111    }
1112    else {
1113       struct lp_build_if_state edge_if;
1114       LLVMTypeRef int1t;
1115       LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1116       LLVMValueRef coord, have_edge, have_corner;
1117       LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1118       LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1119       LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1120       LLVMValueRef face = coords[2];
1121       LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1122       LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1123       /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1124       height_vec = width_vec;
1125       flt_height_vec = flt_width_vec;
1126
1127       /* XXX the overflow logic is actually sort of duplicated with trilinear,
1128        * since an overflow in one mip should also have a corresponding overflow
1129        * in another.
1130        */
1131       /* should always have normalized coords, and offsets are undefined */
1132       assert(bld->static_sampler_state->normalized_coords);
1133       /*
1134        * The coords should all be between [0,1] however we can have NaNs,
1135        * which will wreak havoc. In particular the y1_clamped value below
1136        * can be -INT_MAX (on x86) and be propagated right through (probably
1137        * other values might be bogus in the end too).
1138        * So kill off the NaNs here.
1139        */
1140       coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1141                                    GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1142       coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1143                                    GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1144       coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
1145       /* instead of clamp, build mask if overflowed */
1146       coord = lp_build_sub(coord_bld, coord, half);
1147       /* convert to int, compute lerp weight */
1148       /* not ideal with AVX (and no AVX2) */
1149       lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
1150       x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1151       coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
1152       coord = lp_build_sub(coord_bld, coord, half);
1153       lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
1154       y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1155
1156       fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1157       fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1158       fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1159       fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1160
1161       fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1162       fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1163       have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1164       have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1165
1166       /* needed for accurate corner filtering branch later, rely on 0 init */
1167       int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1168       have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1169
1170       for (texel_index = 0; texel_index < 4; texel_index++) {
1171          xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1172          ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1173          zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1174       }
1175
1176       lp_build_if(&edge_if, bld->gallivm, have_edge);
1177
1178       have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1179       have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1180       LLVMBuildStore(builder, have_corner, have_corners);
1181
1182       /*
1183        * Need to feed clamped values here for cheap corner handling,
1184        * but only for y coord (as when falling off both edges we only
1185        * fall off the x one) - this should be sufficient.
1186        */
1187       y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1188       y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1189
1190       /*
1191        * Get all possible new coords.
1192        */
1193       lp_build_cube_new_coords(ivec_bld, face,
1194                                x0, x1, y0_clamped, y1_clamped,
1195                                length_minus_one,
1196                                new_faces, new_xcoords, new_ycoords);
1197
1198       /* handle fall off x-, x+ direction */
1199       /* determine new coords, face (not both fall_off vars can be true at same time) */
1200       x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1201       y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1202       x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1203       y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1204       x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1205       y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1206       x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1207       y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1208
1209       z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1210       z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1211
1212       /* handle fall off y-, y+ direction */
1213       /*
1214        * Cheap corner logic: just hack up things so a texel doesn't fall
1215        * off both sides (which means filter weights will be wrong but we'll only
1216        * use valid texels in the filter).
1217        * This means however (y) coords must additionally be clamped (see above).
1218        * This corner handling should be fully OpenGL (but not d3d10) compliant.
1219        */
1220       fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1221       fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1222       fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1223       fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1224
1225       x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1226       y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1227       x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1228       y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1229       x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1230       y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1231       x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1232       y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1233
1234       z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1235       z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1236       z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1237       z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1238
1239       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1240          /* now can add cube layer to face (per sample) */
1241          z00 = lp_build_add(ivec_bld, z00, coords[3]);
1242          z01 = lp_build_add(ivec_bld, z01, coords[3]);
1243          z10 = lp_build_add(ivec_bld, z10, coords[3]);
1244          z11 = lp_build_add(ivec_bld, z11, coords[3]);
1245       }
1246
1247       LLVMBuildStore(builder, x00, xs[0]);
1248       LLVMBuildStore(builder, x01, xs[1]);
1249       LLVMBuildStore(builder, x10, xs[2]);
1250       LLVMBuildStore(builder, x11, xs[3]);
1251       LLVMBuildStore(builder, y00, ys[0]);
1252       LLVMBuildStore(builder, y01, ys[1]);
1253       LLVMBuildStore(builder, y10, ys[2]);
1254       LLVMBuildStore(builder, y11, ys[3]);
1255       LLVMBuildStore(builder, z00, zs[0]);
1256       LLVMBuildStore(builder, z01, zs[1]);
1257       LLVMBuildStore(builder, z10, zs[2]);
1258       LLVMBuildStore(builder, z11, zs[3]);
1259
1260       lp_build_else(&edge_if);
1261
1262       LLVMBuildStore(builder, x0, xs[0]);
1263       LLVMBuildStore(builder, x1, xs[1]);
1264       LLVMBuildStore(builder, x0, xs[2]);
1265       LLVMBuildStore(builder, x1, xs[3]);
1266       LLVMBuildStore(builder, y0, ys[0]);
1267       LLVMBuildStore(builder, y0, ys[1]);
1268       LLVMBuildStore(builder, y1, ys[2]);
1269       LLVMBuildStore(builder, y1, ys[3]);
1270       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1271          LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1272          LLVMBuildStore(builder, cube_layer, zs[0]);
1273          LLVMBuildStore(builder, cube_layer, zs[1]);
1274          LLVMBuildStore(builder, cube_layer, zs[2]);
1275          LLVMBuildStore(builder, cube_layer, zs[3]);
1276       }
1277       else {
1278          LLVMBuildStore(builder, face, zs[0]);
1279          LLVMBuildStore(builder, face, zs[1]);
1280          LLVMBuildStore(builder, face, zs[2]);
1281          LLVMBuildStore(builder, face, zs[3]);
1282       }
1283
1284       lp_build_endif(&edge_if);
1285
1286       x00 = LLVMBuildLoad(builder, xs[0], "");
1287       x01 = LLVMBuildLoad(builder, xs[1], "");
1288       x10 = LLVMBuildLoad(builder, xs[2], "");
1289       x11 = LLVMBuildLoad(builder, xs[3], "");
1290       y00 = LLVMBuildLoad(builder, ys[0], "");
1291       y01 = LLVMBuildLoad(builder, ys[1], "");
1292       y10 = LLVMBuildLoad(builder, ys[2], "");
1293       y11 = LLVMBuildLoad(builder, ys[3], "");
1294       z00 = LLVMBuildLoad(builder, zs[0], "");
1295       z01 = LLVMBuildLoad(builder, zs[1], "");
1296       z10 = LLVMBuildLoad(builder, zs[2], "");
1297       z11 = LLVMBuildLoad(builder, zs[3], "");
1298    }
1299
1300    if (linear_mask) {
1301       /*
1302        * Whack filter weights into place. Whatever texel had more weight is
1303        * the one which should have been selected by nearest filtering hence
1304        * just use 100% weight for it.
1305        */
1306       struct lp_build_context *c_bld = &bld->coord_bld;
1307       LLVMValueRef w1_mask, w1_weight;
1308       LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1309
1310       w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1311       /* this select is really just a "and" */
1312       w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1313       s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1314       if (dims >= 2) {
1315          w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1316          w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1317          t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1318          if (dims == 3) {
1319             w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1320             w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1321             r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1322          }
1323       }
1324    }
1325
1326    /*
1327     * Get texture colors.
1328     */
1329    /* get x0/x1 texels */
1330    lp_build_sample_texel_soa(bld,
1331                              width_vec, height_vec, depth_vec,
1332                              x00, y00, z00,
1333                              row_stride_vec, img_stride_vec,
1334                              data_ptr, mipoffsets, neighbors[0][0]);
1335    lp_build_sample_texel_soa(bld,
1336                              width_vec, height_vec, depth_vec,
1337                              x01, y01, z01,
1338                              row_stride_vec, img_stride_vec,
1339                              data_ptr, mipoffsets, neighbors[0][1]);
1340
1341    if (dims == 1) {
1342       assert(!is_gather);
1343       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1344          /* Interpolate two samples from 1D image to produce one color */
1345          for (chan = 0; chan < 4; chan++) {
1346             colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
1347                                              neighbors[0][0][chan],
1348                                              neighbors[0][1][chan],
1349                                              0);
1350          }
1351       }
1352       else {
1353          LLVMValueRef cmpval0, cmpval1;
1354          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1355          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1356          /* simplified lerp, AND mask with weight and add */
1357          colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1358                                            cmpval0, cmpval1);
1359          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1360       }
1361    }
1362    else {
1363       /* 2D/3D texture */
1364       struct lp_build_if_state corner_if;
1365       LLVMValueRef colors0[4], colorss[4];
1366
1367       /* get x0/x1 texels at y1 */
1368       lp_build_sample_texel_soa(bld,
1369                                 width_vec, height_vec, depth_vec,
1370                                 x10, y10, z10,
1371                                 row_stride_vec, img_stride_vec,
1372                                 data_ptr, mipoffsets, neighbors[1][0]);
1373       lp_build_sample_texel_soa(bld,
1374                                 width_vec, height_vec, depth_vec,
1375                                 x11, y11, z11,
1376                                 row_stride_vec, img_stride_vec,
1377                                 data_ptr, mipoffsets, neighbors[1][1]);
1378
1379       /*
1380        * To avoid having to duplicate linear_mask / fetch code use
1381        * another branch (with corner condition though edge would work
1382        * as well) here.
1383        */
1384       if (accurate_cube_corners) {
1385          LLVMValueRef w00, w01, w10, w11, wx0, wy0;
1386          LLVMValueRef c_weight, c00, c01, c10, c11;
1387          LLVMValueRef have_corner, one_third, tmp;
1388
1389          colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1390          colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1391          colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1392          colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1393
1394          have_corner = LLVMBuildLoad(builder, have_corners, "");
1395
1396          lp_build_if(&corner_if, bld->gallivm, have_corner);
1397
1398          /*
1399           * we can't use standard 2d lerp as we need per-element weight
1400           * in case of corners, so just calculate bilinear result as
1401           * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1402           * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
1403           * however calculating the weights needs another 6, so actually probably
1404           * not slower than 2d lerp only for 4 channels as weights only need
1405           * to be calculated once - of course fixing the weights has additional cost.)
1406           */
1407          wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1408          wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1409          w00 = lp_build_mul(coord_bld, wx0, wy0);
1410          w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1411          w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1412          w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1413
1414          /* find corner weight */
1415          c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1416          c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1417          c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1418          c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1419          c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1420          c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1421          c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1422          c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1423
1424          /*
1425           * add 1/3 of the corner weight to each of the 3 other samples
1426           * and null out corner weight
1427           */
1428          one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
1429          c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1430          w00 = lp_build_add(coord_bld, w00, c_weight);
1431          c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1432          w00 = lp_build_andnot(coord_bld, w00, c00);
1433          w01 = lp_build_add(coord_bld, w01, c_weight);
1434          c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1435          w01 = lp_build_andnot(coord_bld, w01, c01);
1436          w10 = lp_build_add(coord_bld, w10, c_weight);
1437          c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1438          w10 = lp_build_andnot(coord_bld, w10, c10);
1439          w11 = lp_build_add(coord_bld, w11, c_weight);
1440          c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1441          w11 = lp_build_andnot(coord_bld, w11, c11);
1442
1443          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1444             for (chan = 0; chan < 4; chan++) {
1445                colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
1446                tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1447                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1448                tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1449                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1450                tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1451                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1452             }
1453          }
1454          else {
1455             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1456             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1457             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1458             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1459             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1460             /* inputs to interpolation are just masks so just add masked weights together */
1461             cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
1462             cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
1463             cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
1464             cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
1465             colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1466             tmp = lp_build_and(coord_bld, w01, cmpval01);
1467             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1468             tmp = lp_build_and(coord_bld, w10, cmpval10);
1469             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1470             tmp = lp_build_and(coord_bld, w11, cmpval11);
1471             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1472             colors0[1] = colors0[2] = colors0[3] = colors0[0];
1473          }
1474
1475          LLVMBuildStore(builder, colors0[0], colorss[0]);
1476          LLVMBuildStore(builder, colors0[1], colorss[1]);
1477          LLVMBuildStore(builder, colors0[2], colorss[2]);
1478          LLVMBuildStore(builder, colors0[3], colorss[3]);
1479
1480          lp_build_else(&corner_if);
1481       }
1482
1483       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1484          if (is_gather) {
1485             /*
1486              * Just assign the red channel (no component selection yet).
1487              * This is a bit hackish, we usually do the swizzle at the
1488              * end of sampling (much less values to swizzle), but this
1489              * obviously cannot work when using gather.
1490              */
1491             unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1492             colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1493                                                       neighbors[1][0],
1494                                                       chan_swiz);
1495             colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1496                                                       neighbors[1][1],
1497                                                       chan_swiz);
1498             colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1499                                                       neighbors[0][1],
1500                                                       chan_swiz);
1501             colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1502                                                       neighbors[0][0],
1503                                                       chan_swiz);
1504          }
1505          else {
1506             /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1507             for (chan = 0; chan < 4; chan++) {
1508                colors0[chan] = lp_build_lerp_2d(texel_bld,
1509                                                 s_fpart, t_fpart,
1510                                                 neighbors[0][0][chan],
1511                                                 neighbors[0][1][chan],
1512                                                 neighbors[1][0][chan],
1513                                                 neighbors[1][1][chan],
1514                                                 0);
1515             }
1516          }
1517       }
1518       else {
1519          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1520          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1521          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1522          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1523          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1524
1525          if (is_gather) {
1526             /* more hacks for swizzling, should be X, ONE or ZERO... */
1527             unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1528             if (chan_swiz <= PIPE_SWIZZLE_W) {
1529                colors0[0] = lp_build_select(texel_bld, cmpval10,
1530                                             texel_bld->one, texel_bld->zero);
1531                colors0[1] = lp_build_select(texel_bld, cmpval11,
1532                                             texel_bld->one, texel_bld->zero);
1533                colors0[2] = lp_build_select(texel_bld, cmpval01,
1534                                             texel_bld->one, texel_bld->zero);
1535                colors0[3] = lp_build_select(texel_bld, cmpval00,
1536                                             texel_bld->one, texel_bld->zero);
1537             }
1538             else if (chan_swiz == PIPE_SWIZZLE_0) {
1539                colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1540                             texel_bld->zero;
1541             }
1542             else {
1543                colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1544                             texel_bld->one;
1545             }
1546          }
1547          else {
1548             colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1549                                              cmpval00, cmpval01, cmpval10, cmpval11);
1550             colors0[1] = colors0[2] = colors0[3] = colors0[0];
1551          }
1552       }
1553
1554       if (accurate_cube_corners) {
1555          LLVMBuildStore(builder, colors0[0], colorss[0]);
1556          LLVMBuildStore(builder, colors0[1], colorss[1]);
1557          LLVMBuildStore(builder, colors0[2], colorss[2]);
1558          LLVMBuildStore(builder, colors0[3], colorss[3]);
1559
1560          lp_build_endif(&corner_if);
1561
1562          colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
1563          colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
1564          colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
1565          colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
1566       }
1567
1568       if (dims == 3) {
1569          LLVMValueRef neighbors1[2][2][4];
1570          LLVMValueRef colors1[4];
1571
1572          assert(!is_gather);
1573
1574          /* get x0/x1/y0/y1 texels at z1 */
1575          lp_build_sample_texel_soa(bld,
1576                                    width_vec, height_vec, depth_vec,
1577                                    x00, y00, z1,
1578                                    row_stride_vec, img_stride_vec,
1579                                    data_ptr, mipoffsets, neighbors1[0][0]);
1580          lp_build_sample_texel_soa(bld,
1581                                    width_vec, height_vec, depth_vec,
1582                                    x01, y01, z1,
1583                                    row_stride_vec, img_stride_vec,
1584                                    data_ptr, mipoffsets, neighbors1[0][1]);
1585          lp_build_sample_texel_soa(bld,
1586                                    width_vec, height_vec, depth_vec,
1587                                    x10, y10, z1,
1588                                    row_stride_vec, img_stride_vec,
1589                                    data_ptr, mipoffsets, neighbors1[1][0]);
1590          lp_build_sample_texel_soa(bld,
1591                                    width_vec, height_vec, depth_vec,
1592                                    x11, y11, z1,
1593                                    row_stride_vec, img_stride_vec,
1594                                    data_ptr, mipoffsets, neighbors1[1][1]);
1595
1596          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1597             /* Bilinear interpolate the four samples from the second Z slice */
1598             for (chan = 0; chan < 4; chan++) {
1599                colors1[chan] = lp_build_lerp_2d(texel_bld,
1600                                                 s_fpart, t_fpart,
1601                                                 neighbors1[0][0][chan],
1602                                                 neighbors1[0][1][chan],
1603                                                 neighbors1[1][0][chan],
1604                                                 neighbors1[1][1][chan],
1605                                                 0);
1606             }
1607             /* Linearly interpolate the two samples from the two 3D slices */
1608             for (chan = 0; chan < 4; chan++) {
1609                colors_out[chan] = lp_build_lerp(texel_bld,
1610                                                 r_fpart,
1611                                                 colors0[chan], colors1[chan],
1612                                                 0);
1613             }
1614          }
1615          else {
1616             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1617             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1618             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1619             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1620             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1621             colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1622                                              cmpval00, cmpval01, cmpval10, cmpval11);
1623             /* Linearly interpolate the two samples from the two 3D slices */
1624             colors_out[0] = lp_build_lerp(texel_bld,
1625                                           r_fpart,
1626                                           colors0[0], colors1[0],
1627                                           0);
1628             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1629          }
1630       }
1631       else {
1632          /* 2D tex */
1633          for (chan = 0; chan < 4; chan++) {
1634             colors_out[chan] = colors0[chan];
1635          }
1636       }
1637    }
1638 }
1639
1640
1641 /**
1642  * Sample the texture/mipmap using given image filter and mip filter.
1643  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1644  * from (vectors or scalars).
1645  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1646  */
1647 static void
1648 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1649                        unsigned img_filter,
1650                        unsigned mip_filter,
1651                        boolean is_gather,
1652                        LLVMValueRef *coords,
1653                        const LLVMValueRef *offsets,
1654                        LLVMValueRef ilevel0,
1655                        LLVMValueRef ilevel1,
1656                        LLVMValueRef lod_fpart,
1657                        LLVMValueRef *colors_out)
1658 {
1659    LLVMBuilderRef builder = bld->gallivm->builder;
1660    LLVMValueRef size0 = NULL;
1661    LLVMValueRef size1 = NULL;
1662    LLVMValueRef row_stride0_vec = NULL;
1663    LLVMValueRef row_stride1_vec = NULL;
1664    LLVMValueRef img_stride0_vec = NULL;
1665    LLVMValueRef img_stride1_vec = NULL;
1666    LLVMValueRef data_ptr0 = NULL;
1667    LLVMValueRef data_ptr1 = NULL;
1668    LLVMValueRef mipoff0 = NULL;
1669    LLVMValueRef mipoff1 = NULL;
1670    LLVMValueRef colors0[4], colors1[4];
1671    unsigned chan;
1672
1673    /* sample the first mipmap level */
1674    lp_build_mipmap_level_sizes(bld, ilevel0,
1675                                &size0,
1676                                &row_stride0_vec, &img_stride0_vec);
1677    if (bld->num_mips == 1) {
1678       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1679    }
1680    else {
1681       /* This path should work for num_lods 1 too but slightly less efficient */
1682       data_ptr0 = bld->base_ptr;
1683       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1684    }
1685    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1686       lp_build_sample_image_nearest(bld, size0,
1687                                     row_stride0_vec, img_stride0_vec,
1688                                     data_ptr0, mipoff0, coords, offsets,
1689                                     colors0);
1690    }
1691    else {
1692       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1693       lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1694                                    row_stride0_vec, img_stride0_vec,
1695                                    data_ptr0, mipoff0, coords, offsets,
1696                                    colors0);
1697    }
1698
1699    /* Store the first level's colors in the output variables */
1700    for (chan = 0; chan < 4; chan++) {
1701        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1702    }
1703
1704    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1705       struct lp_build_if_state if_ctx;
1706       LLVMValueRef need_lerp;
1707
1708       /* need_lerp = lod_fpart > 0 */
1709       if (bld->num_lods == 1) {
1710          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1711                                    lod_fpart, bld->lodf_bld.zero,
1712                                    "need_lerp");
1713       }
1714       else {
1715          /*
1716           * We'll do mip filtering if any of the quads (or individual
1717           * pixel in case of per-pixel lod) need it.
1718           * It might be better to split the vectors here and only fetch/filter
1719           * quads which need it (if there's one lod per quad).
1720           */
1721          need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1722                                       PIPE_FUNC_GREATER,
1723                                       lod_fpart, bld->lodf_bld.zero);
1724          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1725       }
1726
1727       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1728       {
1729          /*
1730           * We unfortunately need to clamp lod_fpart here since we can get
1731           * negative values which would screw up filtering if not all
1732           * lod_fpart values have same sign.
1733           */
1734          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1735                                   bld->lodf_bld.zero);
1736          /* sample the second mipmap level */
1737          lp_build_mipmap_level_sizes(bld, ilevel1,
1738                                      &size1,
1739                                      &row_stride1_vec, &img_stride1_vec);
1740          if (bld->num_mips == 1) {
1741             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1742          }
1743          else {
1744             data_ptr1 = bld->base_ptr;
1745             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1746          }
1747          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1748             lp_build_sample_image_nearest(bld, size1,
1749                                           row_stride1_vec, img_stride1_vec,
1750                                           data_ptr1, mipoff1, coords, offsets,
1751                                           colors1);
1752          }
1753          else {
1754             lp_build_sample_image_linear(bld, FALSE, size1, NULL,
1755                                          row_stride1_vec, img_stride1_vec,
1756                                          data_ptr1, mipoff1, coords, offsets,
1757                                          colors1);
1758          }
1759
1760          /* interpolate samples from the two mipmap levels */
1761
1762          if (bld->num_lods != bld->coord_type.length)
1763             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1764                                                               bld->lodf_bld.type,
1765                                                               bld->texel_bld.type,
1766                                                               lod_fpart);
1767
1768          for (chan = 0; chan < 4; chan++) {
1769             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1770                                           colors0[chan], colors1[chan],
1771                                           0);
1772             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1773          }
1774       }
1775       lp_build_endif(&if_ctx);
1776    }
1777 }
1778
1779
1780 /**
1781  * Sample the texture/mipmap using given mip filter, and using
1782  * both nearest and linear filtering at the same time depending
1783  * on linear_mask.
1784  * lod can be per quad but linear_mask is always per pixel.
1785  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1786  * from (vectors or scalars).
1787  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1788  */
1789 static void
1790 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1791                             LLVMValueRef linear_mask,
1792                             unsigned mip_filter,
1793                             LLVMValueRef *coords,
1794                             const LLVMValueRef *offsets,
1795                             LLVMValueRef ilevel0,
1796                             LLVMValueRef ilevel1,
1797                             LLVMValueRef lod_fpart,
1798                             LLVMValueRef lod_positive,
1799                             LLVMValueRef *colors_out)
1800 {
1801    LLVMBuilderRef builder = bld->gallivm->builder;
1802    LLVMValueRef size0 = NULL;
1803    LLVMValueRef size1 = NULL;
1804    LLVMValueRef row_stride0_vec = NULL;
1805    LLVMValueRef row_stride1_vec = NULL;
1806    LLVMValueRef img_stride0_vec = NULL;
1807    LLVMValueRef img_stride1_vec = NULL;
1808    LLVMValueRef data_ptr0 = NULL;
1809    LLVMValueRef data_ptr1 = NULL;
1810    LLVMValueRef mipoff0 = NULL;
1811    LLVMValueRef mipoff1 = NULL;
1812    LLVMValueRef colors0[4], colors1[4];
1813    unsigned chan;
1814
1815    /* sample the first mipmap level */
1816    lp_build_mipmap_level_sizes(bld, ilevel0,
1817                                &size0,
1818                                &row_stride0_vec, &img_stride0_vec);
1819    if (bld->num_mips == 1) {
1820       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1821    }
1822    else {
1823       /* This path should work for num_lods 1 too but slightly less efficient */
1824       data_ptr0 = bld->base_ptr;
1825       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1826    }
1827
1828    lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
1829                                 row_stride0_vec, img_stride0_vec,
1830                                 data_ptr0, mipoff0, coords, offsets,
1831                                 colors0);
1832
1833    /* Store the first level's colors in the output variables */
1834    for (chan = 0; chan < 4; chan++) {
1835        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1836    }
1837
1838    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1839       struct lp_build_if_state if_ctx;
1840       LLVMValueRef need_lerp;
1841
1842       /*
1843        * We'll do mip filtering if any of the quads (or individual
1844        * pixel in case of per-pixel lod) need it.
1845        * Note using lod_positive here not lod_fpart since it may be the same
1846        * condition as that used in the outer "if" in the caller hence llvm
1847        * should be able to merge the branches in this case.
1848        */
1849       need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1850
1851       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1852       {
1853          /*
1854           * We unfortunately need to clamp lod_fpart here since we can get
1855           * negative values which would screw up filtering if not all
1856           * lod_fpart values have same sign.
1857           */
1858          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1859                                   bld->lodf_bld.zero);
1860          /* sample the second mipmap level */
1861          lp_build_mipmap_level_sizes(bld, ilevel1,
1862                                      &size1,
1863                                      &row_stride1_vec, &img_stride1_vec);
1864          if (bld->num_mips == 1) {
1865             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1866          }
1867          else {
1868             data_ptr1 = bld->base_ptr;
1869             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1870          }
1871
1872          lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
1873                                       row_stride1_vec, img_stride1_vec,
1874                                       data_ptr1, mipoff1, coords, offsets,
1875                                       colors1);
1876
1877          /* interpolate samples from the two mipmap levels */
1878
1879          if (bld->num_lods != bld->coord_type.length)
1880             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1881                                                               bld->lodf_bld.type,
1882                                                               bld->texel_bld.type,
1883                                                               lod_fpart);
1884
1885          for (chan = 0; chan < 4; chan++) {
1886             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1887                                           colors0[chan], colors1[chan],
1888                                           0);
1889             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1890          }
1891       }
1892       lp_build_endif(&if_ctx);
1893    }
1894 }
1895
1896
1897 /**
1898  * Build (per-coord) layer value.
1899  * Either clamp layer to valid values or fill in optional out_of_bounds
1900  * value and just return value unclamped.
1901  */
1902 static LLVMValueRef
1903 lp_build_layer_coord(struct lp_build_sample_context *bld,
1904                      unsigned texture_unit,
1905                      boolean is_cube_array,
1906                      LLVMValueRef layer,
1907                      LLVMValueRef *out_of_bounds)
1908 {
1909    LLVMValueRef num_layers;
1910    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1911
1912    num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
1913                                           bld->context_ptr, texture_unit);
1914
1915    if (out_of_bounds) {
1916       LLVMValueRef out1, out;
1917       assert(!is_cube_array);
1918       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1919       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1920       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1921       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1922       return layer;
1923    }
1924    else {
1925       LLVMValueRef maxlayer;
1926       LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
1927                                        bld->int_bld.one;
1928       maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
1929       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1930       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1931    }
1932 }
1933
1934
1935 /**
1936  * Calculate cube face, lod, mip levels.
1937  */
1938 static void
1939 lp_build_sample_common(struct lp_build_sample_context *bld,
1940                        boolean is_lodq,
1941                        unsigned texture_index,
1942                        unsigned sampler_index,
1943                        LLVMValueRef *coords,
1944                        const struct lp_derivatives *derivs, /* optional */
1945                        LLVMValueRef lod_bias, /* optional */
1946                        LLVMValueRef explicit_lod, /* optional */
1947                        LLVMValueRef *lod_pos_or_zero,
1948                        LLVMValueRef *lod,
1949                        LLVMValueRef *lod_fpart,
1950                        LLVMValueRef *ilevel0,
1951                        LLVMValueRef *ilevel1)
1952 {
1953    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1954    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1955    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1956    const unsigned target = bld->static_texture_state->target;
1957    LLVMValueRef first_level, cube_rho = NULL;
1958    LLVMValueRef lod_ipart = NULL;
1959    struct lp_derivatives cube_derivs;
1960
1961    /*
1962    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1963           mip_filter, min_filter, mag_filter);
1964    */
1965
1966    /*
1967     * Choose cube face, recompute texcoords for the chosen face and
1968     * compute rho here too (as it requires transform of derivatives).
1969     */
1970    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
1971       boolean need_derivs;
1972       need_derivs = ((min_filter != mag_filter ||
1973                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1974                       !bld->static_sampler_state->min_max_lod_equal &&
1975                       !explicit_lod);
1976       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1977       derivs = &cube_derivs;
1978       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
1979          /* calculate cube layer coord now */
1980          LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
1981          LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
1982          layer = lp_build_mul(&bld->int_coord_bld, layer, six);
1983          coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
1984          /* because of seamless filtering can't add it to face (coords[2]) here. */
1985       }
1986    }
1987    else if (target == PIPE_TEXTURE_1D_ARRAY ||
1988             target == PIPE_TEXTURE_2D_ARRAY) {
1989       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1990       coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
1991    }
1992
1993    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1994       /*
1995        * Clamp p coords to [0,1] for fixed function depth texture format here.
1996        * Technically this is not entirely correct for unorm depth as the ref value
1997        * should be converted to the depth format (quantization!) and comparison
1998        * then done in texture format. This would actually help performance (since
1999        * only need to do it once and could save the per-sample conversion of texels
2000        * to floats instead), but it would need more messy code (would need to push
2001        * at least some bits down to actual fetch so conversion could be skipped,
2002        * and would have ugly interaction with border color, would need to convert
2003        * border color to that format too or do some other tricks to make it work).
2004        */
2005       const struct util_format_description *format_desc = bld->format_desc;
2006       unsigned chan_type;
2007       /* not entirely sure we couldn't end up with non-valid swizzle here */
2008       chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
2009                      format_desc->channel[format_desc->swizzle[0]].type :
2010                      UTIL_FORMAT_TYPE_FLOAT;
2011       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2012          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2013                                     bld->coord_bld.zero, bld->coord_bld.one);
2014       }
2015    }
2016
2017    /*
2018     * Compute the level of detail (float).
2019     */
2020    if (min_filter != mag_filter ||
2021        mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2022       /* Need to compute lod either to choose mipmap levels or to
2023        * distinguish between minification/magnification with one mipmap level.
2024        */
2025       lp_build_lod_selector(bld, is_lodq, texture_index, sampler_index,
2026                             coords[0], coords[1], coords[2], cube_rho,
2027                             derivs, lod_bias, explicit_lod,
2028                             mip_filter, lod,
2029                             &lod_ipart, lod_fpart, lod_pos_or_zero);
2030       if (is_lodq) {
2031          LLVMValueRef last_level;
2032          last_level = bld->dynamic_state->last_level(bld->dynamic_state,
2033                                                      bld->gallivm,
2034                                                      bld->context_ptr,
2035                                                      texture_index);
2036          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
2037                                                        bld->gallivm,
2038                                                        bld->context_ptr,
2039                                                        texture_index);
2040          last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2041          last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2042          last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2043
2044          switch (mip_filter) {
2045          case PIPE_TEX_MIPFILTER_NONE:
2046             *lod_fpart = bld->lodf_bld.zero;
2047             break;
2048          case PIPE_TEX_MIPFILTER_NEAREST:
2049              *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2050              /* fallthrough */
2051          case PIPE_TEX_MIPFILTER_LINEAR:
2052             *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2053                                         bld->lodf_bld.zero, last_level);
2054             break;
2055          }
2056          return;
2057       }
2058
2059    } else {
2060       lod_ipart = bld->lodi_bld.zero;
2061       *lod_pos_or_zero = bld->lodi_bld.zero;
2062    }
2063
2064    if (bld->num_lods != bld->num_mips) {
2065       /* only makes sense if there's just a single mip level */
2066       assert(bld->num_mips == 1);
2067       lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2068    }
2069
2070    /*
2071     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2072     */
2073    switch (mip_filter) {
2074    default:
2075       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
2076       /* fall-through */
2077    case PIPE_TEX_MIPFILTER_NONE:
2078       /* always use mip level 0 */
2079       first_level = bld->dynamic_state->first_level(bld->dynamic_state,
2080                                                     bld->gallivm, bld->context_ptr,
2081                                                     texture_index);
2082       first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2083       *ilevel0 = first_level;
2084       break;
2085    case PIPE_TEX_MIPFILTER_NEAREST:
2086       assert(lod_ipart);
2087       lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
2088       break;
2089    case PIPE_TEX_MIPFILTER_LINEAR:
2090       assert(lod_ipart);
2091       assert(*lod_fpart);
2092       lp_build_linear_mip_levels(bld, texture_index,
2093                                  lod_ipart, lod_fpart,
2094                                  ilevel0, ilevel1);
2095       break;
2096    }
2097 }
2098
2099 static void
2100 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2101                             unsigned sampler_unit)
2102 {
2103    struct gallivm_state *gallivm = bld->gallivm;
2104    LLVMBuilderRef builder = gallivm->builder;
2105    LLVMValueRef border_color_ptr =
2106       bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
2107                                        bld->context_ptr, sampler_unit);
2108    LLVMValueRef border_color;
2109    const struct util_format_description *format_desc = bld->format_desc;
2110    struct lp_type vec4_type = bld->texel_type;
2111    struct lp_build_context vec4_bld;
2112    LLVMValueRef min_clamp = NULL;
2113    LLVMValueRef max_clamp = NULL;
2114
2115    /*
2116     * For normalized format need to clamp border color (technically
2117     * probably should also quantize the data). Really sucks doing this
2118     * here but can't avoid at least for now since this is part of
2119     * sampler state and texture format is part of sampler_view state.
2120     * GL expects also expects clamping for uint/sint formats too so
2121     * do that as well (d3d10 can't end up here with uint/sint since it
2122     * only supports them with ld).
2123     */
2124    vec4_type.length = 4;
2125    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2126
2127    /*
2128     * Vectorized clamping of border color. Loading is a bit of a hack since
2129     * we just cast the pointer to float array to pointer to vec4
2130     * (int or float).
2131     */
2132    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
2133                                              lp_build_const_int32(gallivm, 0));
2134    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2135                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
2136    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
2137    /* we don't have aligned type in the dynamic state unfortunately */
2138    LLVMSetAlignment(border_color, 4);
2139
2140    /*
2141     * Instead of having some incredibly complex logic which will try to figure out
2142     * clamping necessary for each channel, simply use the first channel, and treat
2143     * mixed signed/unsigned normalized formats specially.
2144     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
2145     * good reason.)
2146     */
2147    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2148       int chan;
2149       /* d/s needs special handling because both present means just sampling depth */
2150       if (util_format_is_depth_and_stencil(format_desc->format)) {
2151          chan = format_desc->swizzle[0];
2152       }
2153       else {
2154          chan = util_format_get_first_non_void_channel(format_desc->format);
2155       }
2156       if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2157          unsigned chan_type = format_desc->channel[chan].type;
2158          unsigned chan_norm = format_desc->channel[chan].normalized;
2159          unsigned chan_pure = format_desc->channel[chan].pure_integer;
2160          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2161             if (chan_norm) {
2162                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2163                max_clamp = vec4_bld.one;
2164             }
2165             else if (chan_pure) {
2166                /*
2167                 * Border color was stored as int, hence need min/max clamp
2168                 * only if chan has less than 32 bits..
2169                 */
2170                unsigned chan_size = format_desc->channel[chan].size;
2171                if (chan_size < 32) {
2172                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2173                                                      0 - (1 << (chan_size - 1)));
2174                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2175                                                      (1 << (chan_size - 1)) - 1);
2176                }
2177             }
2178             /* TODO: no idea about non-pure, non-normalized! */
2179          }
2180          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2181             if (chan_norm) {
2182                min_clamp = vec4_bld.zero;
2183                max_clamp = vec4_bld.one;
2184             }
2185             /*
2186              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2187              * we use Z32_FLOAT_S8X24 to imply sampling depth component
2188              * and ignoring stencil, which will blow up here if we try to
2189              * do a uint clamp in a float texel build...
2190              * And even if we had that format, mesa st also thinks using z24s8
2191              * means depth sampling ignoring stencil.
2192              */
2193             else if (chan_pure) {
2194                /*
2195                 * Border color was stored as uint, hence never need min
2196                 * clamp, and only need max clamp if chan has less than 32 bits.
2197                 */
2198                unsigned chan_size = format_desc->channel[chan].size;
2199                if (chan_size < 32) {
2200                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2201                                                      (1 << chan_size) - 1);
2202                }
2203                /* TODO: no idea about non-pure, non-normalized! */
2204             }
2205          }
2206          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2207             /* TODO: I have no idea what clamp this would need if any! */
2208          }
2209       }
2210       /* mixed plain formats (or different pure size) */
2211       switch (format_desc->format) {
2212       case PIPE_FORMAT_B10G10R10A2_UINT:
2213       case PIPE_FORMAT_R10G10B10A2_UINT:
2214       {
2215          unsigned max10 = (1 << 10) - 1;
2216          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2217                                         max10, (1 << 2) - 1, NULL);
2218       }
2219          break;
2220       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2221          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2222                                         -1.0F, 0.0F, NULL);
2223          max_clamp = vec4_bld.one;
2224          break;
2225       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2226       case PIPE_FORMAT_R5SG5SB6U_NORM:
2227          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2228                                         0.0F, 0.0F, NULL);
2229          max_clamp = vec4_bld.one;
2230          break;
2231       default:
2232          break;
2233       }
2234    }
2235    else {
2236       /* cannot figure this out from format description */
2237       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2238          /* s3tc formats are always unorm */
2239          min_clamp = vec4_bld.zero;
2240          max_clamp = vec4_bld.one;
2241       }
2242       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2243                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2244          switch (format_desc->format) {
2245          case PIPE_FORMAT_RGTC1_UNORM:
2246          case PIPE_FORMAT_RGTC2_UNORM:
2247          case PIPE_FORMAT_LATC1_UNORM:
2248          case PIPE_FORMAT_LATC2_UNORM:
2249          case PIPE_FORMAT_ETC1_RGB8:
2250             min_clamp = vec4_bld.zero;
2251             max_clamp = vec4_bld.one;
2252             break;
2253          case PIPE_FORMAT_RGTC1_SNORM:
2254          case PIPE_FORMAT_RGTC2_SNORM:
2255          case PIPE_FORMAT_LATC1_SNORM:
2256          case PIPE_FORMAT_LATC2_SNORM:
2257             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2258             max_clamp = vec4_bld.one;
2259             break;
2260          default:
2261             assert(0);
2262             break;
2263          }
2264       }
2265       /*
2266        * all others from subsampled/other group, though we don't care
2267        * about yuv (and should not have any from zs here)
2268        */
2269       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2270          switch (format_desc->format) {
2271          case PIPE_FORMAT_R8G8_B8G8_UNORM:
2272          case PIPE_FORMAT_G8R8_G8B8_UNORM:
2273          case PIPE_FORMAT_G8R8_B8R8_UNORM:
2274          case PIPE_FORMAT_R8G8_R8B8_UNORM:
2275          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2276             min_clamp = vec4_bld.zero;
2277             max_clamp = vec4_bld.one;
2278             break;
2279          case PIPE_FORMAT_R8G8Bx_SNORM:
2280             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2281             max_clamp = vec4_bld.one;
2282             break;
2283             /*
2284              * Note smallfloat formats usually don't need clamping
2285              * (they still have infinite range) however this is not
2286              * true for r11g11b10 and r9g9b9e5, which can't represent
2287              * negative numbers (and additionally r9g9b9e5 can't represent
2288              * very large numbers). d3d10 seems happy without clamping in
2289              * this case, but gl spec is pretty clear: "for floating
2290              * point and integer formats, border values are clamped to
2291              * the representable range of the format" so do that here.
2292              */
2293          case PIPE_FORMAT_R11G11B10_FLOAT:
2294             min_clamp = vec4_bld.zero;
2295             break;
2296          case PIPE_FORMAT_R9G9B9E5_FLOAT:
2297             min_clamp = vec4_bld.zero;
2298             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2299             break;
2300          default:
2301             assert(0);
2302             break;
2303          }
2304       }
2305    }
2306
2307    if (min_clamp) {
2308       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2309    }
2310    if (max_clamp) {
2311       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2312    }
2313
2314    bld->border_color_clamped = border_color;
2315 }
2316
2317
2318 /**
2319  * General texture sampling codegen.
2320  * This function handles texture sampling for all texture targets (1D,
2321  * 2D, 3D, cube) and all filtering modes.
2322  */
2323 static void
2324 lp_build_sample_general(struct lp_build_sample_context *bld,
2325                         unsigned sampler_unit,
2326                         boolean is_gather,
2327                         LLVMValueRef *coords,
2328                         const LLVMValueRef *offsets,
2329                         LLVMValueRef lod_positive,
2330                         LLVMValueRef lod_fpart,
2331                         LLVMValueRef ilevel0,
2332                         LLVMValueRef ilevel1,
2333                         LLVMValueRef *colors_out)
2334 {
2335    LLVMBuilderRef builder = bld->gallivm->builder;
2336    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2337    const unsigned mip_filter = sampler_state->min_mip_filter;
2338    const unsigned min_filter = sampler_state->min_img_filter;
2339    const unsigned mag_filter = sampler_state->mag_img_filter;
2340    LLVMValueRef texels[4];
2341    unsigned chan;
2342
2343    /* if we need border color, (potentially) clamp it now */
2344    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2345                                               min_filter,
2346                                               mag_filter) ||
2347        (bld->dims > 1 &&
2348            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2349                                                   min_filter,
2350                                                   mag_filter)) ||
2351        (bld->dims > 2 &&
2352            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2353                                                   min_filter,
2354                                                   mag_filter))) {
2355       lp_build_clamp_border_color(bld, sampler_unit);
2356    }
2357
2358
2359    /*
2360     * Get/interpolate texture colors.
2361     */
2362
2363    for (chan = 0; chan < 4; ++chan) {
2364      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2365      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2366    }
2367
2368    if (min_filter == mag_filter) {
2369       /* no need to distinguish between minification and magnification */
2370       lp_build_sample_mipmap(bld, min_filter, mip_filter,
2371                              is_gather,
2372                              coords, offsets,
2373                              ilevel0, ilevel1, lod_fpart,
2374                              texels);
2375    }
2376    else {
2377       /*
2378        * Could also get rid of the if-logic and always use mipmap_both, both
2379        * for the single lod and multi-lod case if nothing really uses this.
2380        */
2381       if (bld->num_lods == 1) {
2382          /* Emit conditional to choose min image filter or mag image filter
2383           * depending on the lod being > 0 or <= 0, respectively.
2384           */
2385          struct lp_build_if_state if_ctx;
2386
2387          lod_positive = LLVMBuildTrunc(builder, lod_positive,
2388                                        LLVMInt1TypeInContext(bld->gallivm->context), "");
2389
2390          lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2391          {
2392             /* Use the minification filter */
2393             lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
2394                                    coords, offsets,
2395                                    ilevel0, ilevel1, lod_fpart,
2396                                    texels);
2397          }
2398          lp_build_else(&if_ctx);
2399          {
2400             /* Use the magnification filter */
2401             lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2402                                    FALSE,
2403                                    coords, offsets,
2404                                    ilevel0, NULL, NULL,
2405                                    texels);
2406          }
2407          lp_build_endif(&if_ctx);
2408       }
2409       else {
2410          LLVMValueRef need_linear, linear_mask;
2411          unsigned mip_filter_for_nearest;
2412          struct lp_build_if_state if_ctx;
2413
2414          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2415             linear_mask = lod_positive;
2416             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2417          }
2418          else {
2419             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2420             mip_filter_for_nearest = mip_filter;
2421          }
2422          need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2423                                                linear_mask);
2424
2425          if (bld->num_lods != bld->coord_type.length) {
2426             linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2427                                                                 bld->lodi_type,
2428                                                                 bld->int_coord_type,
2429                                                                 linear_mask);
2430          }
2431
2432          lp_build_if(&if_ctx, bld->gallivm, need_linear);
2433          {
2434             /*
2435              * Do sampling with both filters simultaneously. This means using
2436              * a linear filter and doing some tricks (with weights) for the pixels
2437              * which need nearest filter.
2438              * Note that it's probably rare some pixels need nearest and some
2439              * linear filter but the fixups required for the nearest pixels
2440              * aren't all that complicated so just always run a combined path
2441              * if at least some pixels require linear.
2442              */
2443             lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2444                                         coords, offsets,
2445                                         ilevel0, ilevel1,
2446                                         lod_fpart, lod_positive,
2447                                         texels);
2448          }
2449          lp_build_else(&if_ctx);
2450          {
2451             /*
2452              * All pixels require just nearest filtering, which is way
2453              * cheaper than linear, hence do a separate path for that.
2454              */
2455             lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2456                                    mip_filter_for_nearest, FALSE,
2457                                    coords, offsets,
2458                                    ilevel0, ilevel1, lod_fpart,
2459                                    texels);
2460          }
2461          lp_build_endif(&if_ctx);
2462       }
2463    }
2464
2465    for (chan = 0; chan < 4; ++chan) {
2466      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
2467      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2468    }
2469 }
2470
2471
2472 /**
2473  * Texel fetch function.
2474  * In contrast to general sampling there is no filtering, no coord minification,
2475  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
2476  * directly to be applied to the selected mip level (after adding texel offsets).
2477  * This function handles texel fetch for all targets where texel fetch is supported
2478  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
2479  */
2480 static void
2481 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2482                      unsigned texture_unit,
2483                      const LLVMValueRef *coords,
2484                      LLVMValueRef explicit_lod,
2485                      const LLVMValueRef *offsets,
2486                      LLVMValueRef *colors_out)
2487 {
2488    struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2489    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2490    unsigned dims = bld->dims, chan;
2491    unsigned target = bld->static_texture_state->target;
2492    boolean out_of_bound_ret_zero = TRUE;
2493    LLVMValueRef size, ilevel;
2494    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2495    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2496    LLVMValueRef width, height, depth, i, j;
2497    LLVMValueRef offset, out_of_bounds, out1;
2498
2499    out_of_bounds = int_coord_bld->zero;
2500
2501    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2502       if (bld->num_mips != int_coord_bld->type.length) {
2503          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2504                                             perquadi_bld->type, explicit_lod, 0);
2505       }
2506       else {
2507          ilevel = explicit_lod;
2508       }
2509       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
2510                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
2511    }
2512    else {
2513       assert(bld->num_mips == 1);
2514       if (bld->static_texture_state->target != PIPE_BUFFER) {
2515          ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
2516                                                   bld->context_ptr, texture_unit);
2517       }
2518       else {
2519          ilevel = lp_build_const_int32(bld->gallivm, 0);
2520       }
2521    }
2522    lp_build_mipmap_level_sizes(bld, ilevel,
2523                                &size,
2524                                &row_stride_vec, &img_stride_vec);
2525    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2526                                 size, &width, &height, &depth);
2527
2528    if (target == PIPE_TEXTURE_1D_ARRAY ||
2529        target == PIPE_TEXTURE_2D_ARRAY) {
2530       if (out_of_bound_ret_zero) {
2531          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
2532          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2533       }
2534       else {
2535          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
2536       }
2537    }
2538
2539    /* This is a lot like border sampling */
2540    if (offsets[0]) {
2541       /*
2542        * coords are really unsigned, offsets are signed, but I don't think
2543        * exceeding 31 bits is possible
2544        */
2545       x = lp_build_add(int_coord_bld, x, offsets[0]);
2546    }
2547    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2548    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2549    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2550    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2551
2552    if (dims >= 2) {
2553       if (offsets[1]) {
2554          y = lp_build_add(int_coord_bld, y, offsets[1]);
2555       }
2556       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2557       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2558       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2559       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2560
2561       if (dims >= 3) {
2562          if (offsets[2]) {
2563             z = lp_build_add(int_coord_bld, z, offsets[2]);
2564          }
2565          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
2566          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2567          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
2568          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2569       }
2570    }
2571
2572    lp_build_sample_offset(int_coord_bld,
2573                           bld->format_desc,
2574                           x, y, z, row_stride_vec, img_stride_vec,
2575                           &offset, &i, &j);
2576
2577    if (bld->static_texture_state->target != PIPE_BUFFER) {
2578       offset = lp_build_add(int_coord_bld, offset,
2579                             lp_build_get_mip_offsets(bld, ilevel));
2580    }
2581
2582    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
2583
2584    lp_build_fetch_rgba_soa(bld->gallivm,
2585                            bld->format_desc,
2586                            bld->texel_type, TRUE,
2587                            bld->base_ptr, offset,
2588                            i, j,
2589                            bld->cache,
2590                            colors_out);
2591
2592    if (out_of_bound_ret_zero) {
2593       /*
2594        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2595        * Could use min/max above instead of out-of-bounds comparisons
2596        * if we don't care about the result returned for out-of-bounds.
2597        */
2598       for (chan = 0; chan < 4; chan++) {
2599          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2600                                             bld->texel_bld.zero, colors_out[chan]);
2601       }
2602    }
2603 }
2604
2605
2606 /**
2607  * Just set texels to white instead of actually sampling the texture.
2608  * For debugging.
2609  */
2610 void
2611 lp_build_sample_nop(struct gallivm_state *gallivm,
2612                     struct lp_type type,
2613                     const LLVMValueRef *coords,
2614                     LLVMValueRef texel_out[4])
2615 {
2616    LLVMValueRef one = lp_build_one(gallivm, type);
2617    unsigned chan;
2618
2619    for (chan = 0; chan < 4; chan++) {
2620       texel_out[chan] = one;
2621    }
2622 }
2623
2624
2625 /**
2626  * Build the actual texture sampling code.
2627  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2628  * R, G, B, A.
2629  * \param type  vector float type to use for coords, etc.
2630  * \param sample_key
2631  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
2632  */
2633 static void
2634 lp_build_sample_soa_code(struct gallivm_state *gallivm,
2635                          const struct lp_static_texture_state *static_texture_state,
2636                          const struct lp_static_sampler_state *static_sampler_state,
2637                          struct lp_sampler_dynamic_state *dynamic_state,
2638                          struct lp_type type,
2639                          unsigned sample_key,
2640                          unsigned texture_index,
2641                          unsigned sampler_index,
2642                          LLVMValueRef context_ptr,
2643                          LLVMValueRef thread_data_ptr,
2644                          const LLVMValueRef *coords,
2645                          const LLVMValueRef *offsets,
2646                          const struct lp_derivatives *derivs, /* optional */
2647                          LLVMValueRef lod, /* optional */
2648                          LLVMValueRef texel_out[4])
2649 {
2650    unsigned target = static_texture_state->target;
2651    unsigned dims = texture_dims(target);
2652    unsigned num_quads = type.length / 4;
2653    unsigned mip_filter, min_img_filter, mag_img_filter, i;
2654    struct lp_build_sample_context bld;
2655    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2656    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2657    LLVMBuilderRef builder = gallivm->builder;
2658    LLVMValueRef tex_width, newcoords[5];
2659    enum lp_sampler_lod_property lod_property;
2660    enum lp_sampler_lod_control lod_control;
2661    enum lp_sampler_op_type op_type;
2662    LLVMValueRef lod_bias = NULL;
2663    LLVMValueRef explicit_lod = NULL;
2664    boolean op_is_tex, op_is_lodq, op_is_gather;
2665
2666    if (0) {
2667       enum pipe_format fmt = static_texture_state->format;
2668       debug_printf("Sample from %s\n", util_format_name(fmt));
2669    }
2670
2671    lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
2672                      LP_SAMPLER_LOD_PROPERTY_SHIFT;
2673    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
2674                     LP_SAMPLER_LOD_CONTROL_SHIFT;
2675    op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
2676                  LP_SAMPLER_OP_TYPE_SHIFT;
2677
2678    op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
2679    op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
2680    op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
2681
2682    if (lod_control == LP_SAMPLER_LOD_BIAS) {
2683       lod_bias = lod;
2684       assert(lod);
2685       assert(derivs == NULL);
2686    }
2687    else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
2688       explicit_lod = lod;
2689       assert(lod);
2690       assert(derivs == NULL);
2691    }
2692    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
2693       assert(derivs);
2694       assert(lod == NULL);
2695    }
2696    else {
2697       assert(derivs == NULL);
2698       assert(lod == NULL);
2699    }
2700
2701    if (static_texture_state->format == PIPE_FORMAT_NONE) {
2702       /*
2703        * If there's nothing bound, format is NONE, and we must return
2704        * all zero as mandated by d3d10 in this case.
2705        */
2706       unsigned chan;
2707       LLVMValueRef zero = lp_build_zero(gallivm, type);
2708       for (chan = 0; chan < 4; chan++) {
2709          texel_out[chan] = zero;
2710       }
2711       return;
2712    }
2713
2714    assert(type.floating);
2715
2716    /* Setup our build context */
2717    memset(&bld, 0, sizeof bld);
2718    bld.gallivm = gallivm;
2719    bld.context_ptr = context_ptr;
2720    bld.static_sampler_state = &derived_sampler_state;
2721    bld.static_texture_state = static_texture_state;
2722    bld.dynamic_state = dynamic_state;
2723    bld.format_desc = util_format_description(static_texture_state->format);
2724    bld.dims = dims;
2725
2726    if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD || op_is_lodq) {
2727       bld.no_quad_lod = TRUE;
2728    }
2729    if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX || op_is_lodq) {
2730       bld.no_rho_approx = TRUE;
2731    }
2732    if (gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR || op_is_lodq) {
2733       bld.no_brilinear = TRUE;
2734    }
2735
2736    bld.vector_width = lp_type_width(type);
2737
2738    bld.float_type = lp_type_float(32);
2739    bld.int_type = lp_type_int(32);
2740    bld.coord_type = type;
2741    bld.int_coord_type = lp_int_type(type);
2742    bld.float_size_in_type = lp_type_float(32);
2743    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2744    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2745    bld.texel_type = type;
2746
2747    /* always using the first channel hopefully should be safe,
2748     * if not things WILL break in other places anyway.
2749     */
2750    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2751        bld.format_desc->channel[0].pure_integer) {
2752       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2753          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2754       }
2755       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2756          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2757       }
2758    }
2759    else if (util_format_has_stencil(bld.format_desc) &&
2760        !util_format_has_depth(bld.format_desc)) {
2761       /* for stencil only formats, sample stencil (uint) */
2762       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2763    }
2764
2765    if (!static_texture_state->level_zero_only ||
2766        !static_sampler_state->max_lod_pos || op_is_lodq) {
2767       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2768    } else {
2769       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2770    }
2771    if (op_is_gather) {
2772       /*
2773        * gather4 is exactly like GL_LINEAR filtering but in the end skipping
2774        * the actual filtering. Using mostly the same paths, so cube face
2775        * selection, coord wrapping etc. all naturally uses the same code.
2776        */
2777       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2778       derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
2779       derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
2780    }
2781    mip_filter = derived_sampler_state.min_mip_filter;
2782
2783    if (0) {
2784       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2785    }
2786
2787    if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2788        static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2789    {
2790       /*
2791        * Seamless filtering ignores wrap modes.
2792        * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2793        * bilinear it's not correct but way better than using for instance repeat.
2794        * Note we even set this for non-seamless. Technically GL allows any wrap
2795        * mode, which made sense when supporting true borders (can get seamless
2796        * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2797        * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2798        * up the sampler state (as it makes it texture dependent).
2799        */
2800       derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2801       derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2802    }
2803    /*
2804     * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
2805     * so AoS path could be used. Not sure it's worth the trouble...
2806     */
2807
2808    min_img_filter = derived_sampler_state.min_img_filter;
2809    mag_img_filter = derived_sampler_state.mag_img_filter;
2810
2811
2812    /*
2813     * This is all a bit complicated different paths are chosen for performance
2814     * reasons.
2815     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2816     * everything (the last two options are equivalent for 4-wide case).
2817     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2818     * lod is calculated then the lod value extracted afterwards so making this
2819     * case basically the same as far as lod handling is concerned for the
2820     * further sample/filter code as the 1 lod for everything case.
2821     * Different lod handling mostly shows up when building mipmap sizes
2822     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2823     * (getting the fractional part of the lod to the right texels).
2824     */
2825
2826    /*
2827     * There are other situations where at least the multiple int lods could be
2828     * avoided like min and max lod being equal.
2829     */
2830    bld.num_mips = bld.num_lods = 1;
2831
2832    if (bld.no_quad_lod && bld.no_rho_approx &&
2833        ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
2834          (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2835           static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
2836         op_is_lodq)) {
2837       /*
2838        * special case for using per-pixel lod even for implicit lod,
2839        * which is generally never required (ok by APIs) except to please
2840        * some (somewhat broken imho) tests (because per-pixel face selection
2841        * can cause derivatives to be different for pixels outside the primitive
2842        * due to the major axis division even if pre-project derivatives are
2843        * looking normal).
2844        * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
2845        * cube maps we do indeed get per-pixel lod values).
2846        */
2847       bld.num_mips = type.length;
2848       bld.num_lods = type.length;
2849    }
2850    else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2851        (explicit_lod || lod_bias || derivs)) {
2852       if ((!op_is_tex && target != PIPE_BUFFER) ||
2853           (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2854          bld.num_mips = type.length;
2855          bld.num_lods = type.length;
2856       }
2857       else if (op_is_tex && min_img_filter != mag_img_filter) {
2858          bld.num_mips = 1;
2859          bld.num_lods = type.length;
2860       }
2861    }
2862    /* TODO: for true scalar_lod should only use 1 lod value */
2863    else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
2864             (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2865       bld.num_mips = num_quads;
2866       bld.num_lods = num_quads;
2867    }
2868    else if (op_is_tex && min_img_filter != mag_img_filter) {
2869       bld.num_mips = 1;
2870       bld.num_lods = num_quads;
2871    }
2872
2873
2874    bld.lodf_type = type;
2875    /* we want native vector size to be able to use our intrinsics */
2876    if (bld.num_lods != type.length) {
2877       /* TODO: this currently always has to be per-quad or per-element */
2878       bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2879    }
2880    bld.lodi_type = lp_int_type(bld.lodf_type);
2881    bld.levelf_type = bld.lodf_type;
2882    if (bld.num_mips == 1) {
2883       bld.levelf_type.length = 1;
2884    }
2885    bld.leveli_type = lp_int_type(bld.levelf_type);
2886    bld.float_size_type = bld.float_size_in_type;
2887    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2888     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2889    if (bld.num_mips > 1) {
2890       bld.float_size_type.length = bld.num_mips == type.length ?
2891                                       bld.num_mips * bld.float_size_in_type.length :
2892                                       type.length;
2893    }
2894    bld.int_size_type = lp_int_type(bld.float_size_type);
2895
2896    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2897    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2898    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2899    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2900    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2901    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2902    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2903    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2904    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2905    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2906    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2907    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2908    lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2909    lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2910
2911    /* Get the dynamic state */
2912    tex_width = dynamic_state->width(dynamic_state, gallivm,
2913                                     context_ptr, texture_index);
2914    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
2915                                                     context_ptr, texture_index);
2916    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
2917                                                     context_ptr, texture_index);
2918    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
2919                                           context_ptr, texture_index);
2920    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
2921                                                 context_ptr, texture_index);
2922    /* Note that mip_offsets is an array[level] of offsets to texture images */
2923
2924    if (dynamic_state->cache_ptr && thread_data_ptr) {
2925       bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
2926                                            thread_data_ptr, texture_index);
2927    }
2928
2929    /* width, height, depth as single int vector */
2930    if (dims <= 1) {
2931       bld.int_size = tex_width;
2932    }
2933    else {
2934       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2935                                             tex_width,
2936                                             LLVMConstInt(i32t, 0, 0), "");
2937       if (dims >= 2) {
2938          LLVMValueRef tex_height =
2939             dynamic_state->height(dynamic_state, gallivm,
2940                                   context_ptr, texture_index);
2941          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2942                                                tex_height,
2943                                                LLVMConstInt(i32t, 1, 0), "");
2944          if (dims >= 3) {
2945             LLVMValueRef tex_depth =
2946                dynamic_state->depth(dynamic_state, gallivm, context_ptr,
2947                                     texture_index);
2948             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2949                                                   tex_depth,
2950                                                   LLVMConstInt(i32t, 2, 0), "");
2951          }
2952       }
2953    }
2954
2955    for (i = 0; i < 5; i++) {
2956       newcoords[i] = coords[i];
2957    }
2958
2959    if (util_format_is_pure_integer(static_texture_state->format) &&
2960        !util_format_has_depth(bld.format_desc) && op_is_tex &&
2961        (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
2962         static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2963         static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2964       /*
2965        * Bail if impossible filtering is specified (the awkard additional
2966        * depth check is because it is legal in gallium to have things like S8Z24
2967        * here which would say it's pure int despite such formats should sample
2968        * the depth component).
2969        * In GL such filters make the texture incomplete, this makes it robust
2970        * against state trackers which set this up regardless (we'd crash in the
2971        * lerp later otherwise).
2972        * At least in some apis it may be legal to use such filters with lod
2973        * queries and/or gather (at least for gather d3d10 says only the wrap
2974        * bits are really used hence filter bits are likely simply ignored).
2975        * For fetch, we don't get valid samplers either way here.
2976        */
2977       unsigned chan;
2978       LLVMValueRef zero = lp_build_zero(gallivm, type);
2979       for (chan = 0; chan < 4; chan++) {
2980          texel_out[chan] = zero;
2981       }
2982       return;
2983    }
2984
2985    if (0) {
2986       /* For debug: no-op texture sampling */
2987       lp_build_sample_nop(gallivm,
2988                           bld.texel_type,
2989                           newcoords,
2990                           texel_out);
2991    }
2992
2993    else if (op_type == LP_SAMPLER_OP_FETCH) {
2994       lp_build_fetch_texel(&bld, texture_index, newcoords,
2995                            lod, offsets,
2996                            texel_out);
2997    }
2998
2999    else {
3000       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3001       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3002       boolean use_aos;
3003
3004       use_aos = util_format_fits_8unorm(bld.format_desc) &&
3005                 op_is_tex &&
3006                 /* not sure this is strictly needed or simply impossible */
3007                 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3008                 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3009
3010       use_aos &= bld.num_lods <= num_quads ||
3011                  derived_sampler_state.min_img_filter ==
3012                     derived_sampler_state.mag_img_filter;
3013       if (dims > 1) {
3014          use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3015          if (dims > 2) {
3016             use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3017          }
3018       }
3019       if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3020            static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3021           derived_sampler_state.seamless_cube_map &&
3022           (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3023            derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3024          /* theoretically possible with AoS filtering but not implemented (complex!) */
3025          use_aos = 0;
3026       }
3027
3028       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3029           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3030          debug_printf("%s: using floating point linear filtering for %s\n",
3031                       __FUNCTION__, bld.format_desc->short_name);
3032          debug_printf("  min_img %d  mag_img %d  mip %d  target %d  seamless %d"
3033                       "  wraps %d  wrapt %d  wrapr %d\n",
3034                       derived_sampler_state.min_img_filter,
3035                       derived_sampler_state.mag_img_filter,
3036                       derived_sampler_state.min_mip_filter,
3037                       static_texture_state->target,
3038                       derived_sampler_state.seamless_cube_map,
3039                       derived_sampler_state.wrap_s,
3040                       derived_sampler_state.wrap_t,
3041                       derived_sampler_state.wrap_r);
3042       }
3043
3044       lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3045                              newcoords,
3046                              derivs, lod_bias, explicit_lod,
3047                              &lod_positive, &lod, &lod_fpart,
3048                              &ilevel0, &ilevel1);
3049
3050       if (op_is_lodq) {
3051          texel_out[0] = lod_fpart;
3052          texel_out[1] = lod;
3053          texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3054          return;
3055       }
3056
3057       if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3058          /* The aos path doesn't do seamless filtering so simply add cube layer
3059           * to face now.
3060           */
3061          newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3062       }
3063
3064       /*
3065        * we only try 8-wide sampling with soa or if we have AVX2
3066        * as it appears to be a loss with just AVX)
3067        */
3068       if (num_quads == 1 || !use_aos ||
3069           (util_cpu_caps.has_avx2 &&
3070            (bld.num_lods == 1 ||
3071             derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3072          if (use_aos) {
3073             /* do sampling/filtering with fixed pt arithmetic */
3074             lp_build_sample_aos(&bld, sampler_index,
3075                                 newcoords[0], newcoords[1],
3076                                 newcoords[2],
3077                                 offsets, lod_positive, lod_fpart,
3078                                 ilevel0, ilevel1,
3079                                 texel_out);
3080          }
3081
3082          else {
3083             lp_build_sample_general(&bld, sampler_index,
3084                                     op_type == LP_SAMPLER_OP_GATHER,
3085                                     newcoords, offsets,
3086                                     lod_positive, lod_fpart,
3087                                     ilevel0, ilevel1,
3088                                     texel_out);
3089          }
3090       }
3091       else {
3092          unsigned j;
3093          struct lp_build_sample_context bld4;
3094          struct lp_type type4 = type;
3095          unsigned i;
3096          LLVMValueRef texelout4[4];
3097          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3098
3099          type4.length = 4;
3100
3101          /* Setup our build context */
3102          memset(&bld4, 0, sizeof bld4);
3103          bld4.no_quad_lod = bld.no_quad_lod;
3104          bld4.no_rho_approx = bld.no_rho_approx;
3105          bld4.no_brilinear = bld.no_brilinear;
3106          bld4.gallivm = bld.gallivm;
3107          bld4.context_ptr = bld.context_ptr;
3108          bld4.static_texture_state = bld.static_texture_state;
3109          bld4.static_sampler_state = bld.static_sampler_state;
3110          bld4.dynamic_state = bld.dynamic_state;
3111          bld4.format_desc = bld.format_desc;
3112          bld4.dims = bld.dims;
3113          bld4.row_stride_array = bld.row_stride_array;
3114          bld4.img_stride_array = bld.img_stride_array;
3115          bld4.base_ptr = bld.base_ptr;
3116          bld4.mip_offsets = bld.mip_offsets;
3117          bld4.int_size = bld.int_size;
3118          bld4.cache = bld.cache;
3119
3120          bld4.vector_width = lp_type_width(type4);
3121
3122          bld4.float_type = lp_type_float(32);
3123          bld4.int_type = lp_type_int(32);
3124          bld4.coord_type = type4;
3125          bld4.int_coord_type = lp_int_type(type4);
3126          bld4.float_size_in_type = lp_type_float(32);
3127          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3128          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3129          bld4.texel_type = bld.texel_type;
3130          bld4.texel_type.length = 4;
3131
3132          bld4.num_mips = bld4.num_lods = 1;
3133          if (bld4.no_quad_lod && bld4.no_rho_approx &&
3134              (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3135               static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3136              (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3137             bld4.num_mips = type4.length;
3138             bld4.num_lods = type4.length;
3139          }
3140          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
3141              (explicit_lod || lod_bias || derivs)) {
3142             if ((!op_is_tex && target != PIPE_BUFFER) ||
3143                 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3144                bld4.num_mips = type4.length;
3145                bld4.num_lods = type4.length;
3146             }
3147             else if (op_is_tex && min_img_filter != mag_img_filter) {
3148                bld4.num_mips = 1;
3149                bld4.num_lods = type4.length;
3150             }
3151          }
3152
3153          /* we want native vector size to be able to use our intrinsics */
3154          bld4.lodf_type = type4;
3155          if (bld4.num_lods != type4.length) {
3156             bld4.lodf_type.length = 1;
3157          }
3158          bld4.lodi_type = lp_int_type(bld4.lodf_type);
3159          bld4.levelf_type = type4;
3160          if (bld4.num_mips != type4.length) {
3161             bld4.levelf_type.length = 1;
3162          }
3163          bld4.leveli_type = lp_int_type(bld4.levelf_type);
3164          bld4.float_size_type = bld4.float_size_in_type;
3165          if (bld4.num_mips > 1) {
3166             bld4.float_size_type.length = bld4.num_mips == type4.length ?
3167                                             bld4.num_mips * bld4.float_size_in_type.length :
3168                                             type4.length;
3169          }
3170          bld4.int_size_type = lp_int_type(bld4.float_size_type);
3171
3172          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
3173          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
3174          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
3175          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
3176          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
3177          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
3178          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
3179          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
3180          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
3181          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
3182          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
3183          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
3184          lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
3185          lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
3186
3187          for (i = 0; i < num_quads; i++) {
3188             LLVMValueRef s4, t4, r4;
3189             LLVMValueRef lod_positive4, lod_fpart4 = NULL;
3190             LLVMValueRef ilevel04, ilevel14 = NULL;
3191             LLVMValueRef offsets4[4] = { NULL };
3192             unsigned num_lods = bld4.num_lods;
3193
3194             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
3195             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
3196             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
3197
3198             if (offsets[0]) {
3199                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
3200                if (dims > 1) {
3201                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
3202                   if (dims > 2) {
3203                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3204                   }
3205                }
3206             }
3207             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3208             ilevel04 = bld.num_mips == 1 ? ilevel0 :
3209                           lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3210             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3211                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3212                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3213             }
3214
3215             if (use_aos) {
3216                /* do sampling/filtering with fixed pt arithmetic */
3217                lp_build_sample_aos(&bld4, sampler_index,
3218                                    s4, t4, r4, offsets4,
3219                                    lod_positive4, lod_fpart4,
3220                                    ilevel04, ilevel14,
3221                                    texelout4);
3222             }
3223
3224             else {
3225                /* this path is currently unreachable and hence might break easily... */
3226                LLVMValueRef newcoords4[5];
3227                newcoords4[0] = s4;
3228                newcoords4[1] = t4;
3229                newcoords4[2] = r4;
3230                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3231                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3232
3233                lp_build_sample_general(&bld4, sampler_index,
3234                                        op_type == LP_SAMPLER_OP_GATHER,
3235                                        newcoords4, offsets4,
3236                                        lod_positive4, lod_fpart4,
3237                                        ilevel04, ilevel14,
3238                                        texelout4);
3239             }
3240             for (j = 0; j < 4; j++) {
3241                texelouttmp[j][i] = texelout4[j];
3242             }
3243          }
3244
3245          for (j = 0; j < 4; j++) {
3246             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
3247          }
3248       }
3249    }
3250
3251    if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
3252       apply_sampler_swizzle(&bld, texel_out);
3253    }
3254
3255    /*
3256     * texel type can be a (32bit) int/uint (for pure int formats only),
3257     * however we are expected to always return floats (storage is untyped).
3258     */
3259    if (!bld.texel_type.floating) {
3260       unsigned chan;
3261       for (chan = 0; chan < 4; chan++) {
3262          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
3263                                             lp_build_vec_type(gallivm, type), "");
3264       }
3265    }
3266 }
3267
3268
3269 #define USE_TEX_FUNC_CALL 1
3270
3271 #define LP_MAX_TEX_FUNC_ARGS 32
3272
3273 static inline void
3274 get_target_info(enum pipe_texture_target target,
3275                 unsigned *num_coords, unsigned *num_derivs,
3276                 unsigned *num_offsets, unsigned *layer)
3277 {
3278    unsigned dims = texture_dims(target);
3279    *num_coords = dims;
3280    *num_offsets = dims;
3281    *num_derivs = (target == PIPE_TEXTURE_CUBE ||
3282                   target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
3283    *layer = has_layer_coord(target) ? 2: 0;
3284    if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3285       /*
3286        * dims doesn't include r coord for cubes - this is handled
3287        * by layer instead, but need to fix up for cube arrays...
3288        */
3289       *layer = 3;
3290       *num_coords = 3;
3291    }
3292 }
3293
3294
3295 /**
3296  * Generate the function body for a texture sampling function.
3297  */
3298 static void
3299 lp_build_sample_gen_func(struct gallivm_state *gallivm,
3300                          const struct lp_static_texture_state *static_texture_state,
3301                          const struct lp_static_sampler_state *static_sampler_state,
3302                          struct lp_sampler_dynamic_state *dynamic_state,
3303                          struct lp_type type,
3304                          unsigned texture_index,
3305                          unsigned sampler_index,
3306                          LLVMValueRef function,
3307                          unsigned num_args,
3308                          unsigned sample_key)
3309 {
3310    LLVMBuilderRef old_builder;
3311    LLVMBasicBlockRef block;
3312    LLVMValueRef coords[5];
3313    LLVMValueRef offsets[3] = { NULL };
3314    LLVMValueRef lod = NULL;
3315    LLVMValueRef context_ptr;
3316    LLVMValueRef thread_data_ptr = NULL;
3317    LLVMValueRef texel_out[4];
3318    struct lp_derivatives derivs;
3319    struct lp_derivatives *deriv_ptr = NULL;
3320    unsigned num_param = 0;
3321    unsigned i, num_coords, num_derivs, num_offsets, layer;
3322    enum lp_sampler_lod_control lod_control;
3323    boolean need_cache = FALSE;
3324
3325    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3326                     LP_SAMPLER_LOD_CONTROL_SHIFT;
3327
3328    get_target_info(static_texture_state->target,
3329                    &num_coords, &num_derivs, &num_offsets, &layer);
3330
3331    if (dynamic_state->cache_ptr) {
3332       const struct util_format_description *format_desc;
3333       format_desc = util_format_description(static_texture_state->format);
3334       if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3335          need_cache = TRUE;
3336       }
3337    }
3338
3339    /* "unpack" arguments */
3340    context_ptr = LLVMGetParam(function, num_param++);
3341    if (need_cache) {
3342       thread_data_ptr = LLVMGetParam(function, num_param++);
3343    }
3344    for (i = 0; i < num_coords; i++) {
3345       coords[i] = LLVMGetParam(function, num_param++);
3346    }
3347    for (i = num_coords; i < 5; i++) {
3348       /* This is rather unfortunate... */
3349       coords[i] = lp_build_undef(gallivm, type);
3350    }
3351    if (layer) {
3352       coords[layer] = LLVMGetParam(function, num_param++);
3353    }
3354    if (sample_key & LP_SAMPLER_SHADOW) {
3355       coords[4] = LLVMGetParam(function, num_param++);
3356    }
3357    if (sample_key & LP_SAMPLER_OFFSETS) {
3358       for (i = 0; i < num_offsets; i++) {
3359          offsets[i] = LLVMGetParam(function, num_param++);
3360       }
3361    }
3362    if (lod_control == LP_SAMPLER_LOD_BIAS ||
3363        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3364       lod = LLVMGetParam(function, num_param++);
3365    }
3366    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3367       for (i = 0; i < num_derivs; i++) {
3368          derivs.ddx[i] = LLVMGetParam(function, num_param++);
3369          derivs.ddy[i] = LLVMGetParam(function, num_param++);
3370       }
3371       deriv_ptr = &derivs;
3372    }
3373
3374    assert(num_args == num_param);
3375
3376    /*
3377     * Function body
3378     */
3379
3380    old_builder = gallivm->builder;
3381    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3382    gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3383    LLVMPositionBuilderAtEnd(gallivm->builder, block);
3384
3385    lp_build_sample_soa_code(gallivm,
3386                             static_texture_state,
3387                             static_sampler_state,
3388                             dynamic_state,
3389                             type,
3390                             sample_key,
3391                             texture_index,
3392                             sampler_index,
3393                             context_ptr,
3394                             thread_data_ptr,
3395                             coords,
3396                             offsets,
3397                             deriv_ptr,
3398                             lod,
3399                             texel_out);
3400
3401    LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3402
3403    LLVMDisposeBuilder(gallivm->builder);
3404    gallivm->builder = old_builder;
3405
3406    gallivm_verify_function(gallivm, function);
3407 }
3408
3409
3410 /**
3411  * Call the matching function for texture sampling.
3412  * If there's no match, generate a new one.
3413  */
3414 static void
3415 lp_build_sample_soa_func(struct gallivm_state *gallivm,
3416                          const struct lp_static_texture_state *static_texture_state,
3417                          const struct lp_static_sampler_state *static_sampler_state,
3418                          struct lp_sampler_dynamic_state *dynamic_state,
3419                          const struct lp_sampler_params *params)
3420 {
3421    LLVMBuilderRef builder = gallivm->builder;
3422    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3423                              LLVMGetInsertBlock(builder)));
3424    LLVMValueRef function, inst;
3425    LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3426    LLVMBasicBlockRef bb;
3427    LLVMValueRef tex_ret;
3428    unsigned num_args = 0;
3429    char func_name[64];
3430    unsigned i, num_coords, num_derivs, num_offsets, layer;
3431    unsigned texture_index = params->texture_index;
3432    unsigned sampler_index = params->sampler_index;
3433    unsigned sample_key = params->sample_key;
3434    const LLVMValueRef *coords = params->coords;
3435    const LLVMValueRef *offsets = params->offsets;
3436    const struct lp_derivatives *derivs = params->derivs;
3437    enum lp_sampler_lod_control lod_control;
3438    boolean need_cache = FALSE;
3439
3440    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3441                     LP_SAMPLER_LOD_CONTROL_SHIFT;
3442
3443    get_target_info(static_texture_state->target,
3444                    &num_coords, &num_derivs, &num_offsets, &layer);
3445
3446    if (dynamic_state->cache_ptr) {
3447       const struct util_format_description *format_desc;
3448       format_desc = util_format_description(static_texture_state->format);
3449       if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3450          /*
3451           * This is not 100% correct, if we have cache but the
3452           * util_format_s3tc_prefer is true the cache won't get used
3453           * regardless (could hook up the block decode there...) */
3454          need_cache = TRUE;
3455       }
3456    }
3457    /*
3458     * texture function matches are found by name.
3459     * Thus the name has to include both the texture and sampler unit
3460     * (which covers all static state) plus the actual texture function
3461     * (including things like offsets, shadow coord, lod control).
3462     * Additionally lod_property has to be included too.
3463     */
3464
3465    util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
3466                  texture_index, sampler_index, sample_key);
3467
3468    function = LLVMGetNamedFunction(module, func_name);
3469
3470    if(!function) {
3471       LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
3472       LLVMTypeRef ret_type;
3473       LLVMTypeRef function_type;
3474       LLVMTypeRef val_type[4];
3475       unsigned num_param = 0;
3476
3477       /*
3478        * Generate the function prototype.
3479        */
3480
3481       arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
3482       if (need_cache) {
3483          arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
3484       }
3485       for (i = 0; i < num_coords; i++) {
3486          arg_types[num_param++] = LLVMTypeOf(coords[0]);
3487          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
3488       }
3489       if (layer) {
3490          arg_types[num_param++] = LLVMTypeOf(coords[layer]);
3491          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
3492       }
3493       if (sample_key & LP_SAMPLER_SHADOW) {
3494          arg_types[num_param++] = LLVMTypeOf(coords[0]);
3495       }
3496       if (sample_key & LP_SAMPLER_OFFSETS) {
3497          for (i = 0; i < num_offsets; i++) {
3498             arg_types[num_param++] = LLVMTypeOf(offsets[0]);
3499             assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
3500          }
3501       }
3502       if (lod_control == LP_SAMPLER_LOD_BIAS ||
3503           lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3504          arg_types[num_param++] = LLVMTypeOf(params->lod);
3505       }
3506       else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3507          for (i = 0; i < num_derivs; i++) {
3508             arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
3509             arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
3510             assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
3511             assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
3512          }
3513       }
3514
3515       val_type[0] = val_type[1] = val_type[2] = val_type[3] =
3516          lp_build_vec_type(gallivm, params->type);
3517       ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
3518       function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
3519       function = LLVMAddFunction(module, func_name, function_type);
3520
3521       for (i = 0; i < num_param; ++i) {
3522          if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3523
3524             lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
3525          }
3526       }
3527
3528       LLVMSetFunctionCallConv(function, LLVMFastCallConv);
3529       LLVMSetLinkage(function, LLVMInternalLinkage);
3530
3531       lp_build_sample_gen_func(gallivm,
3532                                static_texture_state,
3533                                static_sampler_state,
3534                                dynamic_state,
3535                                params->type,
3536                                texture_index,
3537                                sampler_index,
3538                                function,
3539                                num_param,
3540                                sample_key);
3541    }
3542
3543    num_args = 0;
3544    args[num_args++] = params->context_ptr;
3545    if (need_cache) {
3546       args[num_args++] = params->thread_data_ptr;
3547    }
3548    for (i = 0; i < num_coords; i++) {
3549       args[num_args++] = coords[i];
3550    }
3551    if (layer) {
3552       args[num_args++] = coords[layer];
3553    }
3554    if (sample_key & LP_SAMPLER_SHADOW) {
3555       args[num_args++] = coords[4];
3556    }
3557    if (sample_key & LP_SAMPLER_OFFSETS) {
3558       for (i = 0; i < num_offsets; i++) {
3559          args[num_args++] = offsets[i];
3560       }
3561    }
3562    if (lod_control == LP_SAMPLER_LOD_BIAS ||
3563        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3564       args[num_args++] = params->lod;
3565    }
3566    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3567       for (i = 0; i < num_derivs; i++) {
3568          args[num_args++] = derivs->ddx[i];
3569          args[num_args++] = derivs->ddy[i];
3570       }
3571    }
3572
3573    assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
3574
3575    tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
3576    bb = LLVMGetInsertBlock(builder);
3577    inst = LLVMGetLastInstruction(bb);
3578    LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
3579
3580    for (i = 0; i < 4; i++) {
3581       params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
3582    }
3583 }
3584
3585
3586 /**
3587  * Build texture sampling code.
3588  * Either via a function call or inline it directly.
3589  */
3590 void
3591 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
3592                     const struct lp_static_sampler_state *static_sampler_state,
3593                     struct lp_sampler_dynamic_state *dynamic_state,
3594                     struct gallivm_state *gallivm,
3595                     const struct lp_sampler_params *params)
3596 {
3597    boolean use_tex_func = FALSE;
3598
3599    /*
3600     * Do not use a function call if the sampling is "simple enough".
3601     * We define this by
3602     * a) format
3603     * b) no mips (either one level only or no mip filter)
3604     * No mips will definitely make the code smaller, though
3605     * the format requirement is a bit iffy - there's some (SoA) formats
3606     * which definitely generate less code. This does happen to catch
3607     * some important cases though which are hurt quite a bit by using
3608     * a call (though not really because of the call overhead but because
3609     * they are reusing the same texture unit with some of the same
3610     * parameters).
3611     * Ideally we'd let llvm recognize this stuff by doing IPO passes.
3612     */
3613
3614    if (USE_TEX_FUNC_CALL) {
3615       const struct util_format_description *format_desc;
3616       boolean simple_format;
3617       boolean simple_tex;
3618       enum lp_sampler_op_type op_type;
3619       format_desc = util_format_description(static_texture_state->format);
3620       simple_format = !format_desc ||
3621                          (util_format_is_rgba8_variant(format_desc) &&
3622                           format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
3623
3624       op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3625                     LP_SAMPLER_OP_TYPE_SHIFT;
3626       simple_tex =
3627          op_type != LP_SAMPLER_OP_TEXTURE ||
3628            ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
3629              static_texture_state->level_zero_only == TRUE) &&
3630             static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
3631
3632       use_tex_func = format_desc && !(simple_format && simple_tex);
3633    }
3634
3635    if (use_tex_func) {
3636       lp_build_sample_soa_func(gallivm,
3637                                static_texture_state,
3638                                static_sampler_state,
3639                                dynamic_state,
3640                                params);
3641    }
3642    else {
3643       lp_build_sample_soa_code(gallivm,
3644                                static_texture_state,
3645                                static_sampler_state,
3646                                dynamic_state,
3647                                params->type,
3648                                params->sample_key,
3649                                params->texture_index,
3650                                params->sampler_index,
3651                                params->context_ptr,
3652                                params->thread_data_ptr,
3653                                params->coords,
3654                                params->offsets,
3655                                params->derivs,
3656                                params->lod,
3657                                params->texel);
3658    }
3659 }
3660
3661
3662 void
3663 lp_build_size_query_soa(struct gallivm_state *gallivm,
3664                         const struct lp_static_texture_state *static_state,
3665                         struct lp_sampler_dynamic_state *dynamic_state,
3666                         const struct lp_sampler_size_query_params *params)
3667 {
3668    LLVMValueRef lod, level = 0, size;
3669    LLVMValueRef first_level = NULL;
3670    int dims, i;
3671    boolean has_array;
3672    unsigned num_lods = 1;
3673    struct lp_build_context bld_int_vec4;
3674    LLVMValueRef context_ptr = params->context_ptr;
3675    unsigned texture_unit = params->texture_unit;
3676    unsigned target = params->target;
3677
3678    if (static_state->format == PIPE_FORMAT_NONE) {
3679       /*
3680        * If there's nothing bound, format is NONE, and we must return
3681        * all zero as mandated by d3d10 in this case.
3682        */
3683       unsigned chan;
3684       LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
3685       for (chan = 0; chan < 4; chan++) {
3686          params->sizes_out[chan] = zero;
3687       }
3688       return;
3689    }
3690
3691    /*
3692     * Do some sanity verification about bound texture and shader dcl target.
3693     * Not entirely sure what's possible but assume array/non-array
3694     * always compatible (probably not ok for OpenGL but d3d10 has no
3695     * distinction of arrays at the resource level).
3696     * Everything else looks bogus (though not entirely sure about rect/2d).
3697     * Currently disabled because it causes assertion failures if there's
3698     * nothing bound (or rather a dummy texture, not that this case would
3699     * return the right values).
3700     */
3701    if (0 && static_state->target != target) {
3702       if (static_state->target == PIPE_TEXTURE_1D)
3703          assert(target == PIPE_TEXTURE_1D_ARRAY);
3704       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
3705          assert(target == PIPE_TEXTURE_1D);
3706       else if (static_state->target == PIPE_TEXTURE_2D)
3707          assert(target == PIPE_TEXTURE_2D_ARRAY);
3708       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
3709          assert(target == PIPE_TEXTURE_2D);
3710       else if (static_state->target == PIPE_TEXTURE_CUBE)
3711          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
3712       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
3713          assert(target == PIPE_TEXTURE_CUBE);
3714       else
3715          assert(0);
3716    }
3717
3718    dims = texture_dims(target);
3719
3720    switch (target) {
3721    case PIPE_TEXTURE_1D_ARRAY:
3722    case PIPE_TEXTURE_2D_ARRAY:
3723    case PIPE_TEXTURE_CUBE_ARRAY:
3724       has_array = TRUE;
3725       break;
3726    default:
3727       has_array = FALSE;
3728       break;
3729    }
3730
3731    assert(!params->int_type.floating);
3732
3733    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
3734
3735    if (params->explicit_lod) {
3736       /* FIXME: this needs to honor per-element lod */
3737       lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
3738                                     lp_build_const_int32(gallivm, 0), "");
3739       first_level = dynamic_state->first_level(dynamic_state, gallivm,
3740                                                context_ptr, texture_unit);
3741       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
3742       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
3743    } else {
3744       lod = bld_int_vec4.zero;
3745    }
3746
3747    size = bld_int_vec4.undef;
3748
3749    size = LLVMBuildInsertElement(gallivm->builder, size,
3750                                  dynamic_state->width(dynamic_state, gallivm,
3751                                                       context_ptr, texture_unit),
3752                                  lp_build_const_int32(gallivm, 0), "");
3753
3754    if (dims >= 2) {
3755       size = LLVMBuildInsertElement(gallivm->builder, size,
3756                                     dynamic_state->height(dynamic_state, gallivm,
3757                                                           context_ptr, texture_unit),
3758                                     lp_build_const_int32(gallivm, 1), "");
3759    }
3760
3761    if (dims >= 3) {
3762       size = LLVMBuildInsertElement(gallivm->builder, size,
3763                                     dynamic_state->depth(dynamic_state, gallivm,
3764                                                          context_ptr, texture_unit),
3765                                     lp_build_const_int32(gallivm, 2), "");
3766    }
3767
3768    size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
3769
3770    if (has_array) {
3771       LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
3772                                                  context_ptr, texture_unit);
3773       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3774          /*
3775           * It looks like GL wants number of cubes, d3d10.1 has it undefined?
3776           * Could avoid this by passing in number of cubes instead of total
3777           * number of layers (might make things easier elsewhere too).
3778           */
3779          LLVMValueRef six = lp_build_const_int32(gallivm, 6);
3780          layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
3781       }
3782       size = LLVMBuildInsertElement(gallivm->builder, size, layers,
3783                                     lp_build_const_int32(gallivm, dims), "");
3784    }
3785
3786    /*
3787     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
3788     * if level is out of bounds (note this can't cover unbound texture
3789     * here, which also requires returning zero).
3790     */
3791    if (params->explicit_lod && params->is_sviewinfo) {
3792       LLVMValueRef last_level, out, out1;
3793       struct lp_build_context leveli_bld;
3794
3795       /* everything is scalar for now */
3796       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
3797       last_level = dynamic_state->last_level(dynamic_state, gallivm,
3798                                              context_ptr, texture_unit);
3799
3800       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
3801       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
3802       out = lp_build_or(&leveli_bld, out, out1);
3803       if (num_lods == 1) {
3804          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
3805       }
3806       else {
3807          /* TODO */
3808          assert(0);
3809       }
3810       size = lp_build_andnot(&bld_int_vec4, size, out);
3811    }
3812    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
3813       params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type,
3814                                                 size,
3815                                                 lp_build_const_int32(gallivm, i));
3816    }
3817    if (params->is_sviewinfo) {
3818       for (; i < 4; i++) {
3819          params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0);
3820       }
3821    }
3822
3823    /*
3824     * if there's no explicit_lod (buffers, rects) queries requiring nr of
3825     * mips would be illegal.
3826     */
3827    if (params->is_sviewinfo && params->explicit_lod) {
3828       struct lp_build_context bld_int_scalar;
3829       LLVMValueRef num_levels;
3830       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
3831
3832       if (static_state->level_zero_only) {
3833          num_levels = bld_int_scalar.one;
3834       }
3835       else {
3836          LLVMValueRef last_level;
3837
3838          last_level = dynamic_state->last_level(dynamic_state, gallivm,
3839                                                 context_ptr, texture_unit);
3840          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
3841          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
3842       }
3843       params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type),
3844                                         num_levels);
3845    }
3846 }