src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_shader_tokens.h"
  39 #include "util/u_debug.h"
  40 #include "util/u_dump.h"
  41 #include "util/u_memory.h"
  42 #include "util/u_math.h"
  43 #include "util/u_format.h"
  44 #include "util/u_cpu_detect.h"
  45 #include "util/u_format_rgb9e5.h"
  46 #include "lp_bld_debug.h"
  47 #include "lp_bld_type.h"
  48 #include "lp_bld_const.h"
  49 #include "lp_bld_conv.h"
  50 #include "lp_bld_arit.h"
  51 #include "lp_bld_bitarit.h"
  52 #include "lp_bld_logic.h"
  53 #include "lp_bld_printf.h"
  54 #include "lp_bld_swizzle.h"
  55 #include "lp_bld_flow.h"
  56 #include "lp_bld_gather.h"
  57 #include "lp_bld_format.h"
  58 #include "lp_bld_sample.h"
  59 #include "lp_bld_sample_aos.h"
  60 #include "lp_bld_struct.h"
  61 #include "lp_bld_quad.h"
  62 #include "lp_bld_pack.h"
  63
  64
  65 /**
  66  * Generate code to fetch a texel from a texture at int coords (x, y, z).
  67  * The computation depends on whether the texture is 1D, 2D or 3D.
  68  * The result, texel, will be float vectors:
  69  *   texel[0] = red values
  70  *   texel[1] = green values
  71  *   texel[2] = blue values
  72  *   texel[3] = alpha values
  73  */
  74 static void
  75 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  76                           unsigned sampler_unit,
  77                           LLVMValueRef width,
  78                           LLVMValueRef height,
  79                           LLVMValueRef depth,
  80                           LLVMValueRef x,
  81                           LLVMValueRef y,
  82                           LLVMValueRef z,
  83                           LLVMValueRef y_stride,
  84                           LLVMValueRef z_stride,
  85                           LLVMValueRef data_ptr,
  86                           LLVMValueRef mipoffsets,
  87                           LLVMValueRef texel_out[4])
  88 {
  89    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
  90    const unsigned dims = bld->dims;
  91    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  92    LLVMBuilderRef builder = bld->gallivm->builder;
  93    LLVMValueRef offset;
  94    LLVMValueRef i, j;
  95    LLVMValueRef use_border = NULL;
  96
  97    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
  98    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
  99                                               static_state->min_img_filter,
 100                                               static_state->mag_img_filter)) {
 101       LLVMValueRef b1, b2;
 102       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 103       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 104       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 105    }
 106
 107    if (dims >= 2 &&
 108        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
 109                                               static_state->min_img_filter,
 110                                               static_state->mag_img_filter)) {
 111       LLVMValueRef b1, b2;
 112       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 113       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 114       if (use_border) {
 115          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 116          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 117       }
 118       else {
 119          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 120       }
 121    }
 122
 123    if (dims == 3 &&
 124        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
 125                                               static_state->min_img_filter,
 126                                               static_state->mag_img_filter)) {
 127       LLVMValueRef b1, b2;
 128       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 129       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 130       if (use_border) {
 131          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 132          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 133       }
 134       else {
 135          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 136       }
 137    }
 138
 139    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 140    lp_build_sample_offset(&bld->int_coord_bld,
 141                           bld->format_desc,
 142                           x, y, z, y_stride, z_stride,
 143                           &offset, &i, &j);
 144    if (mipoffsets) {
 145       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 146    }
 147
 148    if (use_border) {
 149       /* If we can sample the border color, it means that texcoords may
 150        * lie outside the bounds of the texture image.  We need to do
 151        * something to prevent reading out of bounds and causing a segfault.
 152        *
 153        * Simply AND the texture coords with !use_border.  This will cause
 154        * coords which are out of bounds to become zero.  Zero's guaranteed
 155        * to be inside the texture image.
 156        */
 157       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
 158    }
 159
 160    lp_build_fetch_rgba_soa(bld->gallivm,
 161                            bld->format_desc,
 162                            bld->texel_type,
 163                            data_ptr, offset,
 164                            i, j,
 165                            texel_out);
 166
 167    /*
 168     * Note: if we find an app which frequently samples the texture border
 169     * we might want to implement a true conditional here to avoid sampling
 170     * the texture whenever possible (since that's quite a bit of code).
 171     * Ex:
 172     *   if (use_border) {
 173     *      texel = border_color;
 174     *   }
 175     *   else {
 176     *      texel = sample_texture(coord);
 177     *   }
 178     * As it is now, we always sample the texture, then selectively replace
 179     * the texel color results with the border color.
 180     */
 181
 182    if (use_border) {
 183       /* select texel color or border color depending on use_border. */
 184       const struct util_format_description *format_desc = bld->format_desc;
 185       int chan;
 186       struct lp_type border_type = bld->texel_type;
 187       border_type.length = 4;
 188       /*
 189        * Only replace channels which are actually present. The others should
 190        * get optimized away eventually by sampler_view swizzle anyway but it's
 191        * easier too.
 192        */
 193       for (chan = 0; chan < 4; chan++) {
 194          unsigned chan_s;
 195          /* reverse-map channel... */
 196          for (chan_s = 0; chan_s < 4; chan_s++) {
 197             if (chan_s == format_desc->swizzle[chan]) {
 198                break;
 199             }
 200          }
 201          if (chan_s <= 3) {
 202             /* use the already clamped color */
 203             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
 204             LLVMValueRef border_chan;
 205
 206             border_chan = lp_build_extract_broadcast(bld->gallivm,
 207                                                      border_type,
 208                                                      bld->texel_type,
 209                                                      bld->border_color_clamped,
 210                                                      idx);
 211             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 212                                               border_chan, texel_out[chan]);
 213          }
 214       }
 215    }
 216 }
 217
 218
 219 /**
 220  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 221  */
 222 static LLVMValueRef
 223 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 224                       LLVMValueRef coord)
 225 {
 226    struct lp_build_context *coord_bld = &bld->coord_bld;
 227    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 228    LLVMValueRef fract, flr, isOdd;
 229
 230    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
 231
 232    /* isOdd = flr & 1 */
 233    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
 234
 235    /* make coord positive or negative depending on isOdd */
 236    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 237
 238    /* convert isOdd to float */
 239    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 240
 241    /* add isOdd to coord */
 242    coord = lp_build_add(coord_bld, coord, isOdd);
 243
 244    return coord;
 245 }
 246
 247
 248 /**
 249  * Helper to compute the first coord and the weight for
 250  * linear wrap repeat npot textures
 251  */
 252 void
 253 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
 254                                   LLVMValueRef coord_f,
 255                                   LLVMValueRef length_i,
 256                                   LLVMValueRef length_f,
 257                                   LLVMValueRef *coord0_i,
 258                                   LLVMValueRef *weight_f)
 259 {
 260    struct lp_build_context *coord_bld = &bld->coord_bld;
 261    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 262    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 263    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 264                                                 int_coord_bld->one);
 265    LLVMValueRef mask;
 266    /* wrap with normalized floats is just fract */
 267    coord_f = lp_build_fract(coord_bld, coord_f);
 268    /* mul by size and subtract 0.5 */
 269    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 270    coord_f = lp_build_sub(coord_bld, coord_f, half);
 271    /*
 272     * we avoided the 0.5/length division before the repeat wrap,
 273     * now need to fix up edge cases with selects
 274     */
 275    /* convert to int, compute lerp weight */
 276    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
 277    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 278                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 279    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 280 }
 281
 282
 283 /**
 284  * Build LLVM code for texture wrap mode for linear filtering.
 285  * \param x0_out  returns first integer texcoord
 286  * \param x1_out  returns second integer texcoord
 287  * \param weight_out  returns linear interpolation weight
 288  */
 289 static void
 290 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 291                             LLVMValueRef coord,
 292                             LLVMValueRef length,
 293                             LLVMValueRef length_f,
 294                             LLVMValueRef offset,
 295                             boolean is_pot,
 296                             unsigned wrap_mode,
 297                             LLVMValueRef *x0_out,
 298                             LLVMValueRef *x1_out,
 299                             LLVMValueRef *weight_out)
 300 {
 301    struct lp_build_context *coord_bld = &bld->coord_bld;
 302    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 303    LLVMBuilderRef builder = bld->gallivm->builder;
 304    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 305    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 306    LLVMValueRef coord0, coord1, weight;
 307
 308    switch(wrap_mode) {
 309    case PIPE_TEX_WRAP_REPEAT:
 310       if (is_pot) {
 311          /* mul by size and subtract 0.5 */
 312          coord = lp_build_mul(coord_bld, coord, length_f);
 313          coord = lp_build_sub(coord_bld, coord, half);
 314          if (offset) {
 315             offset = lp_build_int_to_float(coord_bld, offset);
 316             coord = lp_build_add(coord_bld, coord, offset);
 317          }
 318          /* convert to int, compute lerp weight */
 319          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 320          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 321          /* repeat wrap */
 322          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 323          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 324       }
 325       else {
 326          LLVMValueRef mask;
 327          if (offset) {
 328             offset = lp_build_int_to_float(coord_bld, offset);
 329             offset = lp_build_div(coord_bld, offset, length_f);
 330             coord = lp_build_add(coord_bld, coord, offset);
 331          }
 332          lp_build_coord_repeat_npot_linear(bld, coord,
 333                                            length, length_f,
 334                                            &coord0, &weight);
 335          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 336                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 337          coord1 = LLVMBuildAnd(builder,
 338                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
 339                                mask, "");
 340       }
 341       break;
 342
 343    case PIPE_TEX_WRAP_CLAMP:
 344       if (bld->static_sampler_state->normalized_coords) {
 345          /* scale coord to length */
 346          coord = lp_build_mul(coord_bld, coord, length_f);
 347       }
 348       if (offset) {
 349          offset = lp_build_int_to_float(coord_bld, offset);
 350          coord = lp_build_add(coord_bld, coord, offset);
 351       }
 352
 353       /* clamp to [0, length] */
 354       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 355
 356       coord = lp_build_sub(coord_bld, coord, half);
 357
 358       /* convert to int, compute lerp weight */
 359       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 360       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 361       break;
 362
 363    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 364       {
 365          struct lp_build_context abs_coord_bld = bld->coord_bld;
 366          abs_coord_bld.type.sign = FALSE;
 367
 368          if (bld->static_sampler_state->normalized_coords) {
 369             /* mul by tex size */
 370             coord = lp_build_mul(coord_bld, coord, length_f);
 371          }
 372          if (offset) {
 373             offset = lp_build_int_to_float(coord_bld, offset);
 374             coord = lp_build_add(coord_bld, coord, offset);
 375          }
 376
 377          /* clamp to length max */
 378          coord = lp_build_min(coord_bld, coord, length_f);
 379          /* subtract 0.5 */
 380          coord = lp_build_sub(coord_bld, coord, half);
 381          /* clamp to [0, length - 0.5] */
 382          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 383          /* convert to int, compute lerp weight */
 384          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 385          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 386          /* coord1 = min(coord1, length-1) */
 387          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 388          break;
 389       }
 390
 391    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 392       if (bld->static_sampler_state->normalized_coords) {
 393          /* scale coord to length */
 394          coord = lp_build_mul(coord_bld, coord, length_f);
 395       }
 396       if (offset) {
 397          offset = lp_build_int_to_float(coord_bld, offset);
 398          coord = lp_build_add(coord_bld, coord, offset);
 399       }
 400       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
 401       /* can skip clamp (though might not work for very large coord values */
 402       coord = lp_build_sub(coord_bld, coord, half);
 403       /* convert to int, compute lerp weight */
 404       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 405       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 406       break;
 407
 408    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 409       /* compute mirror function */
 410       coord = lp_build_coord_mirror(bld, coord);
 411
 412       /* scale coord to length */
 413       coord = lp_build_mul(coord_bld, coord, length_f);
 414       coord = lp_build_sub(coord_bld, coord, half);
 415       if (offset) {
 416          offset = lp_build_int_to_float(coord_bld, offset);
 417          coord = lp_build_add(coord_bld, coord, offset);
 418       }
 419
 420       /* convert to int, compute lerp weight */
 421       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 422       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 423
 424       /* coord0 = max(coord0, 0) */
 425       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 426       /* coord1 = min(coord1, length-1) */
 427       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 428       break;
 429
 430    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 431       if (bld->static_sampler_state->normalized_coords) {
 432          /* scale coord to length */
 433          coord = lp_build_mul(coord_bld, coord, length_f);
 434       }
 435       if (offset) {
 436          offset = lp_build_int_to_float(coord_bld, offset);
 437          coord = lp_build_add(coord_bld, coord, offset);
 438       }
 439       coord = lp_build_abs(coord_bld, coord);
 440
 441       /* clamp to [0, length] */
 442       coord = lp_build_min(coord_bld, coord, length_f);
 443
 444       coord = lp_build_sub(coord_bld, coord, half);
 445
 446       /* convert to int, compute lerp weight */
 447       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 448       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 449       break;
 450
 451    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 452       {
 453          struct lp_build_context abs_coord_bld = bld->coord_bld;
 454          abs_coord_bld.type.sign = FALSE;
 455
 456          if (bld->static_sampler_state->normalized_coords) {
 457             /* scale coord to length */
 458             coord = lp_build_mul(coord_bld, coord, length_f);
 459          }
 460          if (offset) {
 461             offset = lp_build_int_to_float(coord_bld, offset);
 462             coord = lp_build_add(coord_bld, coord, offset);
 463          }
 464          coord = lp_build_abs(coord_bld, coord);
 465
 466          /* clamp to length max */
 467          coord = lp_build_min(coord_bld, coord, length_f);
 468          /* subtract 0.5 */
 469          coord = lp_build_sub(coord_bld, coord, half);
 470          /* clamp to [0, length - 0.5] */
 471          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 472
 473          /* convert to int, compute lerp weight */
 474          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 475          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 476          /* coord1 = min(coord1, length-1) */
 477          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 478       }
 479       break;
 480
 481    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 482       {
 483          if (bld->static_sampler_state->normalized_coords) {
 484             /* scale coord to length */
 485             coord = lp_build_mul(coord_bld, coord, length_f);
 486          }
 487          if (offset) {
 488             offset = lp_build_int_to_float(coord_bld, offset);
 489             coord = lp_build_add(coord_bld, coord, offset);
 490          }
 491          coord = lp_build_abs(coord_bld, coord);
 492
 493          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
 494          /* skip clamp - always positive, and other side
 495             only potentially matters for very large coords */
 496          coord = lp_build_sub(coord_bld, coord, half);
 497
 498          /* convert to int, compute lerp weight */
 499          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 500          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 501       }
 502       break;
 503
 504    default:
 505       assert(0);
 506       coord0 = NULL;
 507       coord1 = NULL;
 508       weight = NULL;
 509    }
 510
 511    *x0_out = coord0;
 512    *x1_out = coord1;
 513    *weight_out = weight;
 514 }
 515
 516
 517 /**
 518  * Build LLVM code for texture wrap mode for nearest filtering.
 519  * \param coord  the incoming texcoord (nominally in [0,1])
 520  * \param length  the texture size along one dimension, as int vector
 521  * \param length_f  the texture size along one dimension, as float vector
 522  * \param offset  texel offset along one dimension (as int vector)
 523  * \param is_pot  if TRUE, length is a power of two
 524  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 525  */
 526 static LLVMValueRef
 527 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 528                              LLVMValueRef coord,
 529                              LLVMValueRef length,
 530                              LLVMValueRef length_f,
 531                              LLVMValueRef offset,
 532                              boolean is_pot,
 533                              unsigned wrap_mode)
 534 {
 535    struct lp_build_context *coord_bld = &bld->coord_bld;
 536    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 537    LLVMBuilderRef builder = bld->gallivm->builder;
 538    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 539    LLVMValueRef icoord;
 540
 541    switch(wrap_mode) {
 542    case PIPE_TEX_WRAP_REPEAT:
 543       if (is_pot) {
 544          coord = lp_build_mul(coord_bld, coord, length_f);
 545          icoord = lp_build_ifloor(coord_bld, coord);
 546          if (offset) {
 547             icoord = lp_build_add(int_coord_bld, icoord, offset);
 548          }
 549          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
 550       }
 551       else {
 552           if (offset) {
 553              offset = lp_build_int_to_float(coord_bld, offset);
 554              offset = lp_build_div(coord_bld, offset, length_f);
 555              coord = lp_build_add(coord_bld, coord, offset);
 556           }
 557           /* take fraction, unnormalize */
 558           coord = lp_build_fract_safe(coord_bld, coord);
 559           coord = lp_build_mul(coord_bld, coord, length_f);
 560           icoord = lp_build_itrunc(coord_bld, coord);
 561       }
 562       break;
 563
 564    case PIPE_TEX_WRAP_CLAMP:
 565    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 566       if (bld->static_sampler_state->normalized_coords) {
 567          /* scale coord to length */
 568          coord = lp_build_mul(coord_bld, coord, length_f);
 569       }
 570
 571       /* floor */
 572       /* use itrunc instead since we clamp to 0 anyway */
 573       icoord = lp_build_itrunc(coord_bld, coord);
 574       if (offset) {
 575          icoord = lp_build_add(int_coord_bld, icoord, offset);
 576       }
 577
 578       /* clamp to [0, length - 1]. */
 579       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 580                               length_minus_one);
 581       break;
 582
 583    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 584       if (bld->static_sampler_state->normalized_coords) {
 585          /* scale coord to length */
 586          coord = lp_build_mul(coord_bld, coord, length_f);
 587       }
 588       /* no clamp necessary, border masking will handle this */
 589       icoord = lp_build_ifloor(coord_bld, coord);
 590       if (offset) {
 591          icoord = lp_build_add(int_coord_bld, icoord, offset);
 592       }
 593       break;
 594
 595    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 596       if (offset) {
 597          offset = lp_build_int_to_float(coord_bld, offset);
 598          offset = lp_build_div(coord_bld, offset, length_f);
 599          coord = lp_build_add(coord_bld, coord, offset);
 600       }
 601       /* compute mirror function */
 602       coord = lp_build_coord_mirror(bld, coord);
 603
 604       /* scale coord to length */
 605       assert(bld->static_sampler_state->normalized_coords);
 606       coord = lp_build_mul(coord_bld, coord, length_f);
 607
 608       /* itrunc == ifloor here */
 609       icoord = lp_build_itrunc(coord_bld, coord);
 610
 611       /* clamp to [0, length - 1] */
 612       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 613       break;
 614
 615    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 616    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 617       if (bld->static_sampler_state->normalized_coords) {
 618          /* scale coord to length */
 619          coord = lp_build_mul(coord_bld, coord, length_f);
 620       }
 621       if (offset) {
 622          offset = lp_build_int_to_float(coord_bld, offset);
 623          coord = lp_build_add(coord_bld, coord, offset);
 624       }
 625       coord = lp_build_abs(coord_bld, coord);
 626
 627       /* itrunc == ifloor here */
 628       icoord = lp_build_itrunc(coord_bld, coord);
 629
 630       /* clamp to [0, length - 1] */
 631       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 632       break;
 633
 634    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 635       if (bld->static_sampler_state->normalized_coords) {
 636          /* scale coord to length */
 637          coord = lp_build_mul(coord_bld, coord, length_f);
 638       }
 639       if (offset) {
 640          offset = lp_build_int_to_float(coord_bld, offset);
 641          coord = lp_build_add(coord_bld, coord, offset);
 642       }
 643       coord = lp_build_abs(coord_bld, coord);
 644
 645       /* itrunc == ifloor here */
 646       icoord = lp_build_itrunc(coord_bld, coord);
 647       break;
 648
 649    default:
 650       assert(0);
 651       icoord = NULL;
 652    }
 653
 654    return icoord;
 655 }
 656
 657
 658 /**
 659  * Do shadow test/comparison.
 660  * \param p shadow ref value
 661  * \param texel  the texel to compare against
 662  */
 663 static LLVMValueRef
 664 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
 665                             LLVMValueRef p,
 666                             LLVMValueRef texel)
 667 {
 668    struct lp_build_context *texel_bld = &bld->texel_bld;
 669    LLVMValueRef res;
 670
 671    if (0) {
 672       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
 673       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
 674    }
 675
 676    /* result = (p FUNC texel) ? 1 : 0 */
 677    /*
 678     * honor d3d10 floating point rules here, which state that comparisons
 679     * are ordered except NOT_EQUAL which is unordered.
 680     */
 681    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
 682       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
 683                                  p, texel);
 684    }
 685    else {
 686       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
 687                          p, texel);
 688    }
 689    return res;
 690 }
 691
 692
 693 /**
 694  * Generate code to sample a mipmap level with nearest filtering.
 695  * If sampling a cube texture, r = cube face in [0,5].
 696  */
 697 static void
 698 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 699                               unsigned sampler_unit,
 700                               LLVMValueRef size,
 701                               LLVMValueRef row_stride_vec,
 702                               LLVMValueRef img_stride_vec,
 703                               LLVMValueRef data_ptr,
 704                               LLVMValueRef mipoffsets,
 705                               LLVMValueRef *coords,
 706                               const LLVMValueRef *offsets,
 707                               LLVMValueRef colors_out[4])
 708 {
 709    const unsigned dims = bld->dims;
 710    LLVMValueRef width_vec;
 711    LLVMValueRef height_vec;
 712    LLVMValueRef depth_vec;
 713    LLVMValueRef flt_size;
 714    LLVMValueRef flt_width_vec;
 715    LLVMValueRef flt_height_vec;
 716    LLVMValueRef flt_depth_vec;
 717    LLVMValueRef x, y = NULL, z = NULL;
 718
 719    lp_build_extract_image_sizes(bld,
 720                                 &bld->int_size_bld,
 721                                 bld->int_coord_type,
 722                                 size,
 723                                 &width_vec, &height_vec, &depth_vec);
 724
 725    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 726
 727    lp_build_extract_image_sizes(bld,
 728                                 &bld->float_size_bld,
 729                                 bld->coord_type,
 730                                 flt_size,
 731                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 732
 733    /*
 734     * Compute integer texcoords.
 735     */
 736    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
 737                                     flt_width_vec, offsets[0],
 738                                     bld->static_texture_state->pot_width,
 739                                     bld->static_sampler_state->wrap_s);
 740    lp_build_name(x, "tex.x.wrapped");
 741
 742    if (dims >= 2) {
 743       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
 744                                        flt_height_vec, offsets[1],
 745                                        bld->static_texture_state->pot_height,
 746                                        bld->static_sampler_state->wrap_t);
 747       lp_build_name(y, "tex.y.wrapped");
 748
 749       if (dims == 3) {
 750          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
 751                                           flt_depth_vec, offsets[2],
 752                                           bld->static_texture_state->pot_depth,
 753                                           bld->static_sampler_state->wrap_r);
 754          lp_build_name(z, "tex.z.wrapped");
 755       }
 756    }
 757    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 758        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 759        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 760       z = coords[2];
 761       lp_build_name(z, "tex.z.layer");
 762    }
 763
 764    /*
 765     * Get texture colors.
 766     */
 767    lp_build_sample_texel_soa(bld, sampler_unit,
 768                              width_vec, height_vec, depth_vec,
 769                              x, y, z,
 770                              row_stride_vec, img_stride_vec,
 771                              data_ptr, mipoffsets, colors_out);
 772
 773    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
 774       LLVMValueRef cmpval;
 775       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
 776       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
 777       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
 778                                       bld->texel_bld.one, bld->texel_bld.zero);
 779       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 780    }
 781
 782 }
 783
 784
 785 /**
 786  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
 787  */
 788 static LLVMValueRef
 789 lp_build_masklerp(struct lp_build_context *bld,
 790                  LLVMValueRef weight,
 791                  LLVMValueRef mask0,
 792                  LLVMValueRef mask1)
 793 {
 794    struct gallivm_state *gallivm = bld->gallivm;
 795    LLVMBuilderRef builder = gallivm->builder;
 796    LLVMValueRef weight2;
 797
 798    weight2 = lp_build_sub(bld, bld->one, weight);
 799    weight = LLVMBuildBitCast(builder, weight,
 800                               lp_build_int_vec_type(gallivm, bld->type), "");
 801    weight2 = LLVMBuildBitCast(builder, weight2,
 802                               lp_build_int_vec_type(gallivm, bld->type), "");
 803    weight = LLVMBuildAnd(builder, weight, mask1, "");
 804    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
 805    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
 806    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
 807    return lp_build_add(bld, weight, weight2);
 808 }
 809
 810 /**
 811  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
 812  */
 813 static LLVMValueRef
 814 lp_build_masklerp2d(struct lp_build_context *bld,
 815                     LLVMValueRef weight0,
 816                     LLVMValueRef weight1,
 817                     LLVMValueRef mask00,
 818                     LLVMValueRef mask01,
 819                     LLVMValueRef mask10,
 820                     LLVMValueRef mask11)
 821 {
 822    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
 823    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
 824    return lp_build_lerp(bld, weight1, val0, val1, 0);
 825 }
 826
 827 /**
 828  * Generate code to sample a mipmap level with linear filtering.
 829  * If sampling a cube texture, r = cube face in [0,5].
 830  */
 831 static void
 832 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 833                              unsigned sampler_unit,
 834                              LLVMValueRef size,
 835                              LLVMValueRef row_stride_vec,
 836                              LLVMValueRef img_stride_vec,
 837                              LLVMValueRef data_ptr,
 838                              LLVMValueRef mipoffsets,
 839                              LLVMValueRef *coords,
 840                              const LLVMValueRef *offsets,
 841                              LLVMValueRef colors_out[4])
 842 {
 843    const unsigned dims = bld->dims;
 844    LLVMValueRef width_vec;
 845    LLVMValueRef height_vec;
 846    LLVMValueRef depth_vec;
 847    LLVMValueRef flt_size;
 848    LLVMValueRef flt_width_vec;
 849    LLVMValueRef flt_height_vec;
 850    LLVMValueRef flt_depth_vec;
 851    LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
 852    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
 853    LLVMValueRef neighbors[2][2][4];
 854    int chan;
 855
 856    lp_build_extract_image_sizes(bld,
 857                                 &bld->int_size_bld,
 858                                 bld->int_coord_type,
 859                                 size,
 860                                 &width_vec, &height_vec, &depth_vec);
 861
 862    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 863
 864    lp_build_extract_image_sizes(bld,
 865                                 &bld->float_size_bld,
 866                                 bld->coord_type,
 867                                 flt_size,
 868                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 869
 870    /*
 871     * Compute integer texcoords.
 872     */
 873    lp_build_sample_wrap_linear(bld, coords[0], width_vec,
 874                                flt_width_vec, offsets[0],
 875                                bld->static_texture_state->pot_width,
 876                                bld->static_sampler_state->wrap_s,
 877                                &x0, &x1, &s_fpart);
 878    lp_build_name(x0, "tex.x0.wrapped");
 879    lp_build_name(x1, "tex.x1.wrapped");
 880
 881    if (dims >= 2) {
 882       lp_build_sample_wrap_linear(bld, coords[1], height_vec,
 883                                   flt_height_vec, offsets[1],
 884                                   bld->static_texture_state->pot_height,
 885                                   bld->static_sampler_state->wrap_t,
 886                                   &y0, &y1, &t_fpart);
 887       lp_build_name(y0, "tex.y0.wrapped");
 888       lp_build_name(y1, "tex.y1.wrapped");
 889
 890       if (dims == 3) {
 891          lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
 892                                      flt_depth_vec, offsets[2],
 893                                      bld->static_texture_state->pot_depth,
 894                                      bld->static_sampler_state->wrap_r,
 895                                      &z0, &z1, &r_fpart);
 896          lp_build_name(z0, "tex.z0.wrapped");
 897          lp_build_name(z1, "tex.z1.wrapped");
 898       }
 899    }
 900    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 901        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 902        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 903       z0 = z1 = coords[2];  /* cube face or layer */
 904       lp_build_name(z0, "tex.z0.layer");
 905       lp_build_name(z1, "tex.z1.layer");
 906    }
 907
 908
 909    /*
 910     * Get texture colors.
 911     */
 912    /* get x0/x1 texels */
 913    lp_build_sample_texel_soa(bld, sampler_unit,
 914                              width_vec, height_vec, depth_vec,
 915                              x0, y0, z0,
 916                              row_stride_vec, img_stride_vec,
 917                              data_ptr, mipoffsets, neighbors[0][0]);
 918    lp_build_sample_texel_soa(bld, sampler_unit,
 919                              width_vec, height_vec, depth_vec,
 920                              x1, y0, z0,
 921                              row_stride_vec, img_stride_vec,
 922                              data_ptr, mipoffsets, neighbors[0][1]);
 923
 924    if (dims == 1) {
 925       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
 926          /* Interpolate two samples from 1D image to produce one color */
 927          for (chan = 0; chan < 4; chan++) {
 928             colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
 929                                              neighbors[0][0][chan],
 930                                              neighbors[0][1][chan],
 931                                              0);
 932          }
 933       }
 934       else {
 935          LLVMValueRef cmpval0, cmpval1;
 936          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
 937          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
 938          /* simplified lerp, AND mask with weight and add */
 939          colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
 940                                            cmpval0, cmpval1);
 941          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 942       }
 943    }
 944    else {
 945       /* 2D/3D texture */
 946       LLVMValueRef colors0[4];
 947
 948       /* get x0/x1 texels at y1 */
 949       lp_build_sample_texel_soa(bld, sampler_unit,
 950                                 width_vec, height_vec, depth_vec,
 951                                 x0, y1, z0,
 952                                 row_stride_vec, img_stride_vec,
 953                                 data_ptr, mipoffsets, neighbors[1][0]);
 954       lp_build_sample_texel_soa(bld, sampler_unit,
 955                                 width_vec, height_vec, depth_vec,
 956                                 x1, y1, z0,
 957                                 row_stride_vec, img_stride_vec,
 958                                 data_ptr, mipoffsets, neighbors[1][1]);
 959
 960       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
 961          /* Bilinear interpolate the four samples from the 2D image / 3D slice */
 962          for (chan = 0; chan < 4; chan++) {
 963             colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
 964                                              s_fpart, t_fpart,
 965                                              neighbors[0][0][chan],
 966                                              neighbors[0][1][chan],
 967                                              neighbors[1][0][chan],
 968                                              neighbors[1][1][chan],
 969                                              0);
 970          }
 971       }
 972       else {
 973          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
 974          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
 975          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
 976          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
 977          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
 978          colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
 979                                           cmpval00, cmpval01, cmpval10, cmpval11);
 980          colors0[1] = colors0[2] = colors0[3] = colors0[0];
 981       }
 982
 983       if (dims == 3) {
 984          LLVMValueRef neighbors1[2][2][4];
 985          LLVMValueRef colors1[4];
 986
 987          /* get x0/x1/y0/y1 texels at z1 */
 988          lp_build_sample_texel_soa(bld, sampler_unit,
 989                                    width_vec, height_vec, depth_vec,
 990                                    x0, y0, z1,
 991                                    row_stride_vec, img_stride_vec,
 992                                    data_ptr, mipoffsets, neighbors1[0][0]);
 993          lp_build_sample_texel_soa(bld, sampler_unit,
 994                                    width_vec, height_vec, depth_vec,
 995                                    x1, y0, z1,
 996                                    row_stride_vec, img_stride_vec,
 997                                    data_ptr, mipoffsets, neighbors1[0][1]);
 998          lp_build_sample_texel_soa(bld, sampler_unit,
 999                                    width_vec, height_vec, depth_vec,
1000                                    x0, y1, z1,
1001                                    row_stride_vec, img_stride_vec,
1002                                    data_ptr, mipoffsets, neighbors1[1][0]);
1003          lp_build_sample_texel_soa(bld, sampler_unit,
1004                                    width_vec, height_vec, depth_vec,
1005                                    x1, y1, z1,
1006                                    row_stride_vec, img_stride_vec,
1007                                    data_ptr, mipoffsets, neighbors1[1][1]);
1008
1009          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1010             /* Bilinear interpolate the four samples from the second Z slice */
1011             for (chan = 0; chan < 4; chan++) {
1012                colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1013                                                 s_fpart, t_fpart,
1014                                                 neighbors1[0][0][chan],
1015                                                 neighbors1[0][1][chan],
1016                                                 neighbors1[1][0][chan],
1017                                                 neighbors1[1][1][chan],
1018                                                 0);
1019             }
1020             /* Linearly interpolate the two samples from the two 3D slices */
1021             for (chan = 0; chan < 4; chan++) {
1022                colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1023                                                 r_fpart,
1024                                                 colors0[chan], colors1[chan],
1025                                                 0);
1026             }
1027          }
1028          else {
1029             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1030             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1031             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1032             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1033             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1034             colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1035                                              cmpval00, cmpval01, cmpval10, cmpval11);
1036             /* Linearly interpolate the two samples from the two 3D slices */
1037             colors_out[0] = lp_build_lerp(&bld->texel_bld,
1038                                              r_fpart,
1039                                              colors0[0], colors1[0],
1040                                              0);
1041             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1042          }
1043       }
1044       else {
1045          /* 2D tex */
1046          for (chan = 0; chan < 4; chan++) {
1047             colors_out[chan] = colors0[chan];
1048          }
1049       }
1050    }
1051 }
1052
1053
1054 /**
1055  * Sample the texture/mipmap using given image filter and mip filter.
1056  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1057  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1058  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1059  */
1060 static void
1061 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1062                        unsigned sampler_unit,
1063                        unsigned img_filter,
1064                        unsigned mip_filter,
1065                        LLVMValueRef *coords,
1066                        const LLVMValueRef *offsets,
1067                        LLVMValueRef ilevel0,
1068                        LLVMValueRef ilevel1,
1069                        LLVMValueRef lod_fpart,
1070                        LLVMValueRef *colors_out)
1071 {
1072    LLVMBuilderRef builder = bld->gallivm->builder;
1073    LLVMValueRef size0 = NULL;
1074    LLVMValueRef size1 = NULL;
1075    LLVMValueRef row_stride0_vec = NULL;
1076    LLVMValueRef row_stride1_vec = NULL;
1077    LLVMValueRef img_stride0_vec = NULL;
1078    LLVMValueRef img_stride1_vec = NULL;
1079    LLVMValueRef data_ptr0 = NULL;
1080    LLVMValueRef data_ptr1 = NULL;
1081    LLVMValueRef mipoff0 = NULL;
1082    LLVMValueRef mipoff1 = NULL;
1083    LLVMValueRef colors0[4], colors1[4];
1084    unsigned chan;
1085
1086    /* sample the first mipmap level */
1087    lp_build_mipmap_level_sizes(bld, ilevel0,
1088                                &size0,
1089                                &row_stride0_vec, &img_stride0_vec);
1090    if (bld->num_lods == 1) {
1091       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1092    }
1093    else {
1094       /* This path should work for num_lods 1 too but slightly less efficient */
1095       data_ptr0 = bld->base_ptr;
1096       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1097    }
1098    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1099       lp_build_sample_image_nearest(bld, sampler_unit,
1100                                     size0,
1101                                     row_stride0_vec, img_stride0_vec,
1102                                     data_ptr0, mipoff0, coords, offsets,
1103                                     colors0);
1104    }
1105    else {
1106       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1107       lp_build_sample_image_linear(bld, sampler_unit,
1108                                    size0,
1109                                    row_stride0_vec, img_stride0_vec,
1110                                    data_ptr0, mipoff0, coords, offsets,
1111                                    colors0);
1112    }
1113
1114    /* Store the first level's colors in the output variables */
1115    for (chan = 0; chan < 4; chan++) {
1116        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1117    }
1118
1119    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1120       struct lp_build_if_state if_ctx;
1121       LLVMValueRef need_lerp;
1122
1123       /* need_lerp = lod_fpart > 0 */
1124       if (bld->num_lods == 1) {
1125          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1126                                    lod_fpart, bld->levelf_bld.zero,
1127                                    "need_lerp");
1128       }
1129       else {
1130          /*
1131           * We'll do mip filtering if any of the quads (or individual
1132           * pixel in case of per-pixel lod) need it.
1133           * It might be better to split the vectors here and only fetch/filter
1134           * quads which need it.
1135           */
1136          /*
1137           * We unfortunately need to clamp lod_fpart here since we can get
1138           * negative values which would screw up filtering if not all
1139           * lod_fpart values have same sign.
1140           */
1141          lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
1142                                   bld->levelf_bld.zero);
1143          need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
1144                                       PIPE_FUNC_GREATER,
1145                                       lod_fpart, bld->levelf_bld.zero);
1146          need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
1147       }
1148
1149       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1150       {
1151          /* sample the second mipmap level */
1152          lp_build_mipmap_level_sizes(bld, ilevel1,
1153                                      &size1,
1154                                      &row_stride1_vec, &img_stride1_vec);
1155          if (bld->num_lods == 1) {
1156             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1157          }
1158          else {
1159             data_ptr1 = bld->base_ptr;
1160             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1161          }
1162          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1163             lp_build_sample_image_nearest(bld, sampler_unit,
1164                                           size1,
1165                                           row_stride1_vec, img_stride1_vec,
1166                                           data_ptr1, mipoff1, coords, offsets,
1167                                           colors1);
1168          }
1169          else {
1170             lp_build_sample_image_linear(bld, sampler_unit,
1171                                          size1,
1172                                          row_stride1_vec, img_stride1_vec,
1173                                          data_ptr1, mipoff1, coords, offsets,
1174                                          colors1);
1175          }
1176
1177          /* interpolate samples from the two mipmap levels */
1178
1179          if (bld->num_lods != bld->coord_type.length)
1180             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1181                                                               bld->levelf_bld.type,
1182                                                               bld->texel_bld.type,
1183                                                               lod_fpart);
1184
1185          for (chan = 0; chan < 4; chan++) {
1186             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1187                                           colors0[chan], colors1[chan],
1188                                           0);
1189             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1190          }
1191       }
1192       lp_build_endif(&if_ctx);
1193    }
1194 }
1195
1196
1197 /**
1198  * Build (per-coord) layer value.
1199  * Either clamp layer to valid values or fill in optional out_of_bounds
1200  * value and just return value unclamped.
1201  */
1202 static LLVMValueRef
1203 lp_build_layer_coord(struct lp_build_sample_context *bld,
1204                      unsigned texture_unit,
1205                      LLVMValueRef layer,
1206                      LLVMValueRef *out_of_bounds)
1207 {
1208    LLVMValueRef num_layers;
1209    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1210
1211    num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1212                                           bld->gallivm, texture_unit);
1213
1214    if (out_of_bounds) {
1215       LLVMValueRef out1, out;
1216       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1217       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1218       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1219       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1220       return layer;
1221    }
1222    else {
1223       LLVMValueRef maxlayer;
1224       maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1225       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1226       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1227    }
1228 }
1229
1230
1231 /**
1232  * Calculate cube face, lod, mip levels.
1233  */
1234 static void
1235 lp_build_sample_common(struct lp_build_sample_context *bld,
1236                        unsigned texture_index,
1237                        unsigned sampler_index,
1238                        LLVMValueRef *coords,
1239                        const struct lp_derivatives *derivs, /* optional */
1240                        LLVMValueRef lod_bias, /* optional */
1241                        LLVMValueRef explicit_lod, /* optional */
1242                        LLVMValueRef *lod_ipart,
1243                        LLVMValueRef *lod_fpart,
1244                        LLVMValueRef *ilevel0,
1245                        LLVMValueRef *ilevel1)
1246 {
1247    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1248    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1249    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1250    const unsigned target = bld->static_texture_state->target;
1251    LLVMValueRef first_level, cube_rho = NULL;
1252
1253    /*
1254    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1255           mip_filter, min_filter, mag_filter);
1256    */
1257
1258    /*
1259     * Choose cube face, recompute texcoords for the chosen face and
1260     * compute rho here too (as it requires transform of derivatives).
1261     */
1262    if (target == PIPE_TEXTURE_CUBE) {
1263       boolean need_derivs;
1264       need_derivs = ((min_filter != mag_filter ||
1265                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1266                       !bld->static_sampler_state->min_max_lod_equal &&
1267                       !explicit_lod);
1268       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
1269    }
1270    else if (target == PIPE_TEXTURE_1D_ARRAY ||
1271             target == PIPE_TEXTURE_2D_ARRAY) {
1272       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1273       coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
1274    }
1275
1276    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1277       /*
1278        * Clamp p coords to [0,1] for fixed function depth texture format here.
1279        * Technically this is not entirely correct for unorm depth as the ref value
1280        * should be converted to the depth format (quantization!) and comparison
1281        * then done in texture format. This would actually help performance (since
1282        * only need to do it once and could save the per-sample conversion of texels
1283        * to floats instead), but it would need more messy code (would need to push
1284        * at least some bits down to actual fetch so conversion could be skipped,
1285        * and would have ugly interaction with border color, would need to convert
1286        * border color to that format too or do some other tricks to make it work).
1287        */
1288       const struct util_format_description *format_desc = bld->format_desc;
1289       unsigned chan_type;
1290       /* not entirely sure we couldn't end up with non-valid swizzle here */
1291       chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1292                      format_desc->channel[format_desc->swizzle[0]].type :
1293                      UTIL_FORMAT_TYPE_FLOAT;
1294       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1295          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1296                                     bld->coord_bld.zero, bld->coord_bld.one);
1297       }
1298    }
1299
1300    /*
1301     * Compute the level of detail (float).
1302     */
1303    if (min_filter != mag_filter ||
1304        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1305       /* Need to compute lod either to choose mipmap levels or to
1306        * distinguish between minification/magnification with one mipmap level.
1307        */
1308       lp_build_lod_selector(bld, texture_index, sampler_index,
1309                             coords[0], coords[1], coords[2], cube_rho,
1310                             derivs, lod_bias, explicit_lod,
1311                             mip_filter,
1312                             lod_ipart, lod_fpart);
1313    } else {
1314       *lod_ipart = bld->leveli_bld.zero;
1315    }
1316
1317    /*
1318     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1319     */
1320    switch (mip_filter) {
1321    default:
1322       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1323       /* fall-through */
1324    case PIPE_TEX_MIPFILTER_NONE:
1325       /* always use mip level 0 */
1326       if (HAVE_LLVM == 0x0207 && target == PIPE_TEXTURE_CUBE) {
1327          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1328           * We should be able to set ilevel0 = const(0) but that causes
1329           * bad x86 code to be emitted.
1330           */
1331          assert(*lod_ipart);
1332          lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1333       }
1334       else {
1335          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1336                                                        bld->gallivm, texture_index);
1337          first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1338          *ilevel0 = first_level;
1339       }
1340       break;
1341    case PIPE_TEX_MIPFILTER_NEAREST:
1342       assert(*lod_ipart);
1343       lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1344       break;
1345    case PIPE_TEX_MIPFILTER_LINEAR:
1346       assert(*lod_ipart);
1347       assert(*lod_fpart);
1348       lp_build_linear_mip_levels(bld, texture_index,
1349                                  *lod_ipart, lod_fpart,
1350                                  ilevel0, ilevel1);
1351       break;
1352    }
1353 }
1354
1355 static void
1356 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1357                             unsigned sampler_unit)
1358 {
1359    struct gallivm_state *gallivm = bld->gallivm;
1360    LLVMBuilderRef builder = gallivm->builder;
1361    LLVMValueRef border_color_ptr =
1362       bld->dynamic_state->border_color(bld->dynamic_state,
1363                                         gallivm, sampler_unit);
1364    LLVMValueRef border_color;
1365    const struct util_format_description *format_desc = bld->format_desc;
1366    struct lp_type vec4_type = bld->texel_type;
1367    struct lp_build_context vec4_bld;
1368    LLVMValueRef min_clamp = NULL;
1369    LLVMValueRef max_clamp = NULL;
1370
1371    /*
1372     * For normalized format need to clamp border color (technically
1373     * probably should also quantize the data). Really sucks doing this
1374     * here but can't avoid at least for now since this is part of
1375     * sampler state and texture format is part of sampler_view state.
1376     * GL expects also expects clamping for uint/sint formats too so
1377     * do that as well (d3d10 can't end up here with uint/sint since it
1378     * only supports them with ld).
1379     */
1380    vec4_type.length = 4;
1381    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1382
1383    /*
1384     * Vectorized clamping of border color. Loading is a bit of a hack since
1385     * we just cast the pointer to float array to pointer to vec4
1386     * (int or float).
1387     */
1388    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1389                                              lp_build_const_int32(gallivm, 0));
1390    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1391                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
1392    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1393    /* we don't have aligned type in the dynamic state unfortunately */
1394    lp_set_load_alignment(border_color, 4);
1395
1396    /*
1397     * Instead of having some incredibly complex logic which will try to figure out
1398     * clamping necessary for each channel, simply use the first channel, and treat
1399     * mixed signed/unsigned normalized formats specially.
1400     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1401     * good reason.)
1402     */
1403    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1404       int chan;
1405       /* d/s needs special handling because both present means just sampling depth */
1406       if (util_format_is_depth_and_stencil(format_desc->format)) {
1407          chan = format_desc->swizzle[0];
1408       }
1409       else {
1410          chan = util_format_get_first_non_void_channel(format_desc->format);
1411       }
1412       if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1413          unsigned chan_type = format_desc->channel[chan].type;
1414          unsigned chan_norm = format_desc->channel[chan].normalized;
1415          unsigned chan_pure = format_desc->channel[chan].pure_integer;
1416          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1417             if (chan_norm) {
1418                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1419                max_clamp = vec4_bld.one;
1420             }
1421             else if (chan_pure) {
1422                /*
1423                 * Border color was stored as int, hence need min/max clamp
1424                 * only if chan has less than 32 bits..
1425                 */
1426                unsigned chan_size = format_desc->channel[chan].size;
1427                if (chan_size < 32) {
1428                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1429                                                      0 - (1 << (chan_size - 1)));
1430                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1431                                                      (1 << (chan_size - 1)) - 1);
1432                }
1433             }
1434             /* TODO: no idea about non-pure, non-normalized! */
1435          }
1436          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1437             if (chan_norm) {
1438                min_clamp = vec4_bld.zero;
1439                max_clamp = vec4_bld.one;
1440             }
1441             /*
1442              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1443              * we use Z32_FLOAT_S8X24 to imply sampling depth component
1444              * and ignoring stencil, which will blow up here if we try to
1445              * do a uint clamp in a float texel build...
1446              * And even if we had that format, mesa st also thinks using z24s8
1447              * means depth sampling ignoring stencil.
1448              */
1449             else if (chan_pure) {
1450                /*
1451                 * Border color was stored as uint, hence never need min
1452                 * clamp, and only need max clamp if chan has less than 32 bits.
1453                 */
1454                unsigned chan_size = format_desc->channel[chan].size;
1455                if (chan_size < 32) {
1456                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1457                                                      (1 << chan_size) - 1);
1458                }
1459                /* TODO: no idea about non-pure, non-normalized! */
1460             }
1461          }
1462          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
1463             /* TODO: I have no idea what clamp this would need if any! */
1464          }
1465       }
1466       /* mixed plain formats (or different pure size) */
1467       switch (format_desc->format) {
1468       case PIPE_FORMAT_B10G10R10A2_UINT:
1469       case PIPE_FORMAT_R10G10B10A2_UINT:
1470       {
1471          unsigned max10 = (1 << 10) - 1;
1472          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
1473                                         max10, (1 << 2) - 1, NULL);
1474       }
1475          break;
1476       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1477          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1478                                         -1.0F, 0.0F, NULL);
1479          max_clamp = vec4_bld.one;
1480          break;
1481       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
1482       case PIPE_FORMAT_R5SG5SB6U_NORM:
1483          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1484                                         0.0F, 0.0F, NULL);
1485          max_clamp = vec4_bld.one;
1486          break;
1487       default:
1488          break;
1489       }
1490    }
1491    else {
1492       /* cannot figure this out from format description */
1493       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1494          /* s3tc formats are always unorm */
1495          min_clamp = vec4_bld.zero;
1496          max_clamp = vec4_bld.one;
1497       }
1498       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
1499                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
1500          switch (format_desc->format) {
1501          case PIPE_FORMAT_RGTC1_UNORM:
1502          case PIPE_FORMAT_RGTC2_UNORM:
1503          case PIPE_FORMAT_LATC1_UNORM:
1504          case PIPE_FORMAT_LATC2_UNORM:
1505          case PIPE_FORMAT_ETC1_RGB8:
1506             min_clamp = vec4_bld.zero;
1507             max_clamp = vec4_bld.one;
1508             break;
1509          case PIPE_FORMAT_RGTC1_SNORM:
1510          case PIPE_FORMAT_RGTC2_SNORM:
1511          case PIPE_FORMAT_LATC1_SNORM:
1512          case PIPE_FORMAT_LATC2_SNORM:
1513             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1514             max_clamp = vec4_bld.one;
1515             break;
1516          default:
1517             assert(0);
1518             break;
1519          }
1520       }
1521       /*
1522        * all others from subsampled/other group, though we don't care
1523        * about yuv (and should not have any from zs here)
1524        */
1525       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
1526          switch (format_desc->format) {
1527          case PIPE_FORMAT_R8G8_B8G8_UNORM:
1528          case PIPE_FORMAT_G8R8_G8B8_UNORM:
1529          case PIPE_FORMAT_G8R8_B8R8_UNORM:
1530          case PIPE_FORMAT_R8G8_R8B8_UNORM:
1531          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
1532             min_clamp = vec4_bld.zero;
1533             max_clamp = vec4_bld.one;
1534             break;
1535          case PIPE_FORMAT_R8G8Bx_SNORM:
1536             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1537             max_clamp = vec4_bld.one;
1538             break;
1539             /*
1540              * Note smallfloat formats usually don't need clamping
1541              * (they still have infinite range) however this is not
1542              * true for r11g11b10 and r9g9b9e5, which can't represent
1543              * negative numbers (and additionally r9g9b9e5 can't represent
1544              * very large numbers). d3d10 seems happy without clamping in
1545              * this case, but gl spec is pretty clear: "for floating
1546              * point and integer formats, border values are clamped to
1547              * the representable range of the format" so do that here.
1548              */
1549          case PIPE_FORMAT_R11G11B10_FLOAT:
1550             min_clamp = vec4_bld.zero;
1551             break;
1552          case PIPE_FORMAT_R9G9B9E5_FLOAT:
1553             min_clamp = vec4_bld.zero;
1554             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
1555             break;
1556          default:
1557             assert(0);
1558             break;
1559          }
1560       }
1561    }
1562
1563    if (min_clamp) {
1564       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
1565    }
1566    if (max_clamp) {
1567       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
1568    }
1569
1570    bld->border_color_clamped = border_color;
1571 }
1572
1573
1574 /**
1575  * General texture sampling codegen.
1576  * This function handles texture sampling for all texture targets (1D,
1577  * 2D, 3D, cube) and all filtering modes.
1578  */
1579 static void
1580 lp_build_sample_general(struct lp_build_sample_context *bld,
1581                         unsigned sampler_unit,
1582                         LLVMValueRef *coords,
1583                         const LLVMValueRef *offsets,
1584                         LLVMValueRef lod_ipart,
1585                         LLVMValueRef lod_fpart,
1586                         LLVMValueRef ilevel0,
1587                         LLVMValueRef ilevel1,
1588                         LLVMValueRef *colors_out)
1589 {
1590    struct lp_build_context *int_bld = &bld->int_bld;
1591    LLVMBuilderRef builder = bld->gallivm->builder;
1592    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
1593    const unsigned mip_filter = sampler_state->min_mip_filter;
1594    const unsigned min_filter = sampler_state->min_img_filter;
1595    const unsigned mag_filter = sampler_state->mag_img_filter;
1596    LLVMValueRef texels[4];
1597    unsigned chan;
1598
1599    /* if we need border color, (potentially) clamp it now */
1600    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
1601                                               min_filter,
1602                                               mag_filter) ||
1603        (bld->dims > 1 &&
1604            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
1605                                                   min_filter,
1606                                                   mag_filter)) ||
1607        (bld->dims > 2 &&
1608            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
1609                                                   min_filter,
1610                                                   mag_filter))) {
1611       lp_build_clamp_border_color(bld, sampler_unit);
1612    }
1613
1614
1615    /*
1616     * Get/interpolate texture colors.
1617     */
1618
1619    for (chan = 0; chan < 4; ++chan) {
1620      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
1621      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
1622    }
1623
1624    if (min_filter == mag_filter) {
1625       /* no need to distinguish between minification and magnification */
1626       lp_build_sample_mipmap(bld, sampler_unit,
1627                              min_filter, mip_filter,
1628                              coords, offsets,
1629                              ilevel0, ilevel1, lod_fpart,
1630                              texels);
1631    }
1632    else {
1633       /* Emit conditional to choose min image filter or mag image filter
1634        * depending on the lod being > 0 or <= 0, respectively.
1635        */
1636       struct lp_build_if_state if_ctx;
1637       LLVMValueRef minify;
1638
1639       /*
1640        * XXX this should take all lods into account, if some are min
1641        * some max probably could hack up the coords/weights in the linear
1642        * path with selects to work for nearest.
1643        * If that's just two quads sitting next to each other it seems
1644        * quite ok to do the same filtering method on both though, at
1645        * least unless we have explicit lod (and who uses different
1646        * min/mag filter with that?)
1647        */
1648       if (bld->num_lods > 1)
1649          lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1650                                              lp_build_const_int32(bld->gallivm, 0), "");
1651
1652       /* minify = lod >= 0.0 */
1653       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1654                              lod_ipart, int_bld->zero, "");
1655
1656       lp_build_if(&if_ctx, bld->gallivm, minify);
1657       {
1658          /* Use the minification filter */
1659          lp_build_sample_mipmap(bld, sampler_unit,
1660                                 min_filter, mip_filter,
1661                                 coords, offsets,
1662                                 ilevel0, ilevel1, lod_fpart,
1663                                 texels);
1664       }
1665       lp_build_else(&if_ctx);
1666       {
1667          /* Use the magnification filter */
1668          lp_build_sample_mipmap(bld, sampler_unit,
1669                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1670                                 coords, offsets,
1671                                 ilevel0, NULL, NULL,
1672                                 texels);
1673       }
1674       lp_build_endif(&if_ctx);
1675    }
1676
1677    for (chan = 0; chan < 4; ++chan) {
1678      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
1679      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
1680    }
1681 }
1682
1683
1684 /**
1685  * Texel fetch function.
1686  * In contrast to general sampling there is no filtering, no coord minification,
1687  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
1688  * directly to be applied to the selected mip level (after adding texel offsets).
1689  * This function handles texel fetch for all targets where texel fetch is supported
1690  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
1691  */
1692 static void
1693 lp_build_fetch_texel(struct lp_build_sample_context *bld,
1694                      unsigned texture_unit,
1695                      const LLVMValueRef *coords,
1696                      LLVMValueRef explicit_lod,
1697                      const LLVMValueRef *offsets,
1698                      LLVMValueRef *colors_out)
1699 {
1700    struct lp_build_context *perquadi_bld = &bld->leveli_bld;
1701    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1702    unsigned dims = bld->dims, chan;
1703    unsigned target = bld->static_texture_state->target;
1704    boolean out_of_bound_ret_zero = TRUE;
1705    LLVMValueRef size, ilevel;
1706    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
1707    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
1708    LLVMValueRef width, height, depth, i, j;
1709    LLVMValueRef offset, out_of_bounds, out1;
1710
1711    out_of_bounds = int_coord_bld->zero;
1712
1713    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
1714       if (bld->num_lods != int_coord_bld->type.length) {
1715          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
1716                                             perquadi_bld->type, explicit_lod, 0);
1717       }
1718       else {
1719          ilevel = explicit_lod;
1720       }
1721       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
1722                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
1723    }
1724    else {
1725       assert(bld->num_lods == 1);
1726       if (bld->static_texture_state->target != PIPE_BUFFER) {
1727          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
1728                                                   bld->gallivm, texture_unit);
1729       }
1730       else {
1731          ilevel = lp_build_const_int32(bld->gallivm, 0);
1732       }
1733    }
1734    lp_build_mipmap_level_sizes(bld, ilevel,
1735                                &size,
1736                                &row_stride_vec, &img_stride_vec);
1737    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
1738                                 size, &width, &height, &depth);
1739
1740    if (target == PIPE_TEXTURE_1D_ARRAY ||
1741        target == PIPE_TEXTURE_2D_ARRAY) {
1742       if (out_of_bound_ret_zero) {
1743          z = lp_build_layer_coord(bld, texture_unit, z, &out1);
1744          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1745       }
1746       else {
1747          z = lp_build_layer_coord(bld, texture_unit, z, NULL);
1748       }
1749    }
1750
1751    /* This is a lot like border sampling */
1752    if (offsets[0]) {
1753       /*
1754        * coords are really unsigned, offsets are signed, but I don't think
1755        * exceeding 31 bits is possible
1756        */
1757       x = lp_build_add(int_coord_bld, x, offsets[0]);
1758    }
1759    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
1760    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1761    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
1762    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1763
1764    if (dims >= 2) {
1765       if (offsets[1]) {
1766          y = lp_build_add(int_coord_bld, y, offsets[1]);
1767       }
1768       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
1769       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1770       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
1771       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1772
1773       if (dims >= 3) {
1774          if (offsets[2]) {
1775             z = lp_build_add(int_coord_bld, z, offsets[2]);
1776          }
1777          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
1778          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1779          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
1780          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1781       }
1782    }
1783
1784    lp_build_sample_offset(int_coord_bld,
1785                           bld->format_desc,
1786                           x, y, z, row_stride_vec, img_stride_vec,
1787                           &offset, &i, &j);
1788
1789    if (bld->static_texture_state->target != PIPE_BUFFER) {
1790       offset = lp_build_add(int_coord_bld, offset,
1791                             lp_build_get_mip_offsets(bld, ilevel));
1792    }
1793
1794    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
1795
1796    lp_build_fetch_rgba_soa(bld->gallivm,
1797                            bld->format_desc,
1798                            bld->texel_type,
1799                            bld->base_ptr, offset,
1800                            i, j,
1801                            colors_out);
1802
1803    if (out_of_bound_ret_zero) {
1804       /*
1805        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
1806        * Could use min/max above instead of out-of-bounds comparisons
1807        * if we don't care about the result returned for out-of-bounds.
1808        */
1809       for (chan = 0; chan < 4; chan++) {
1810          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
1811                                             bld->texel_bld.zero, colors_out[chan]);
1812       }
1813    }
1814 }
1815
1816
1817 /**
1818  * Just set texels to white instead of actually sampling the texture.
1819  * For debugging.
1820  */
1821 void
1822 lp_build_sample_nop(struct gallivm_state *gallivm,
1823                     struct lp_type type,
1824                     const LLVMValueRef *coords,
1825                     LLVMValueRef texel_out[4])
1826 {
1827    LLVMValueRef one = lp_build_one(gallivm, type);
1828    unsigned chan;
1829
1830    for (chan = 0; chan < 4; chan++) {
1831       texel_out[chan] = one;
1832    }
1833 }
1834
1835
1836 /**
1837  * Build texture sampling code.
1838  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1839  * R, G, B, A.
1840  * \param type  vector float type to use for coords, etc.
1841  * \param is_fetch  if this is a texel fetch instruction.
1842  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
1843  */
1844 void
1845 lp_build_sample_soa(struct gallivm_state *gallivm,
1846                     const struct lp_static_texture_state *static_texture_state,
1847                     const struct lp_static_sampler_state *static_sampler_state,
1848                     struct lp_sampler_dynamic_state *dynamic_state,
1849                     struct lp_type type,
1850                     boolean is_fetch,
1851                     unsigned texture_index,
1852                     unsigned sampler_index,
1853                     const LLVMValueRef *coords,
1854                     const LLVMValueRef *offsets,
1855                     const struct lp_derivatives *derivs, /* optional */
1856                     LLVMValueRef lod_bias, /* optional */
1857                     LLVMValueRef explicit_lod, /* optional */
1858                     enum lp_sampler_lod_property lod_property,
1859                     LLVMValueRef texel_out[4])
1860 {
1861    unsigned target = static_texture_state->target;
1862    unsigned dims = texture_dims(target);
1863    unsigned num_quads = type.length / 4;
1864    unsigned mip_filter, i;
1865    struct lp_build_sample_context bld;
1866    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
1867    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
1868    LLVMBuilderRef builder = gallivm->builder;
1869    LLVMValueRef tex_width, newcoords[5];
1870
1871    if (0) {
1872       enum pipe_format fmt = static_texture_state->format;
1873       debug_printf("Sample from %s\n", util_format_name(fmt));
1874    }
1875
1876    assert(type.floating);
1877
1878    /* Setup our build context */
1879    memset(&bld, 0, sizeof bld);
1880    bld.gallivm = gallivm;
1881    bld.static_sampler_state = &derived_sampler_state;
1882    bld.static_texture_state = static_texture_state;
1883    bld.dynamic_state = dynamic_state;
1884    bld.format_desc = util_format_description(static_texture_state->format);
1885    bld.dims = dims;
1886
1887    bld.vector_width = lp_type_width(type);
1888
1889    bld.float_type = lp_type_float(32);
1890    bld.int_type = lp_type_int(32);
1891    bld.coord_type = type;
1892    bld.int_coord_type = lp_int_type(type);
1893    bld.float_size_in_type = lp_type_float(32);
1894    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
1895    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
1896    bld.texel_type = type;
1897
1898    /* always using the first channel hopefully should be safe,
1899     * if not things WILL break in other places anyway.
1900     */
1901    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
1902        bld.format_desc->channel[0].pure_integer) {
1903       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
1904          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1905       }
1906       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1907          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
1908       }
1909    }
1910    else if (util_format_has_stencil(bld.format_desc) &&
1911        !util_format_has_depth(bld.format_desc)) {
1912       /* for stencil only formats, sample stencil (uint) */
1913       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1914    }
1915
1916    if (!static_texture_state->level_zero_only) {
1917       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
1918    } else {
1919       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
1920    }
1921    mip_filter = derived_sampler_state.min_mip_filter;
1922
1923    if (0) {
1924       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
1925    }
1926
1927    /*
1928     * This is all a bit complicated different paths are chosen for performance
1929     * reasons.
1930     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
1931     * everything (the last two options are equivalent for 4-wide case).
1932     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
1933     * lod is calculated then the lod value extracted afterwards so making this
1934     * case basically the same as far as lod handling is concerned for the
1935     * further sample/filter code as the 1 lod for everything case.
1936     * Different lod handling mostly shows up when building mipmap sizes
1937     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
1938     * (getting the fractional part of the lod to the right texels).
1939     */
1940
1941    /*
1942     * There are other situations where at least the multiple int lods could be
1943     * avoided like min and max lod being equal.
1944     */
1945    if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
1946        ((is_fetch && target != PIPE_BUFFER) ||
1947         (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
1948       bld.num_lods = type.length;
1949    /* TODO: for true scalar_lod should only use 1 lod value */
1950    else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
1951             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
1952       bld.num_lods = num_quads;
1953    }
1954    else {
1955       bld.num_lods = 1;
1956    }
1957
1958    bld.levelf_type = type;
1959    /* we want native vector size to be able to use our intrinsics */
1960    if (bld.num_lods != type.length) {
1961       bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
1962    }
1963    bld.leveli_type = lp_int_type(bld.levelf_type);
1964    bld.float_size_type = bld.float_size_in_type;
1965    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
1966     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
1967    if (bld.num_lods > 1) {
1968       bld.float_size_type.length = bld.num_lods == type.length ?
1969                                       bld.num_lods * bld.float_size_in_type.length :
1970                                       type.length;
1971    }
1972    bld.int_size_type = lp_int_type(bld.float_size_type);
1973
1974    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
1975    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
1976    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
1977    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
1978    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
1979    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
1980    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
1981    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
1982    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
1983    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
1984    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
1985    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
1986
1987    /* Get the dynamic state */
1988    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
1989    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
1990    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
1991    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
1992    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
1993    /* Note that mip_offsets is an array[level] of offsets to texture images */
1994
1995    /* width, height, depth as single int vector */
1996    if (dims <= 1) {
1997       bld.int_size = tex_width;
1998    }
1999    else {
2000       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2001                                             tex_width, LLVMConstInt(i32t, 0, 0), "");
2002       if (dims >= 2) {
2003          LLVMValueRef tex_height =
2004             dynamic_state->height(dynamic_state, gallivm, texture_index);
2005          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2006                                                tex_height, LLVMConstInt(i32t, 1, 0), "");
2007          if (dims >= 3) {
2008             LLVMValueRef tex_depth =
2009                dynamic_state->depth(dynamic_state, gallivm, texture_index);
2010             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2011                                                   tex_depth, LLVMConstInt(i32t, 2, 0), "");
2012          }
2013       }
2014    }
2015
2016    for (i = 0; i < 5; i++) {
2017       newcoords[i] = coords[i];
2018    }
2019
2020    if (0) {
2021       /* For debug: no-op texture sampling */
2022       lp_build_sample_nop(gallivm,
2023                           bld.texel_type,
2024                           newcoords,
2025                           texel_out);
2026    }
2027
2028    else if (is_fetch) {
2029       lp_build_fetch_texel(&bld, texture_index, newcoords,
2030                            explicit_lod, offsets,
2031                            texel_out);
2032    }
2033
2034    else {
2035       LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
2036       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2037       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2038                         /* not sure this is strictly needed or simply impossible */
2039                         static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE &&
2040                         lp_is_simple_wrap_mode(static_sampler_state->wrap_s);
2041       if (dims > 1) {
2042          use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
2043          if (dims > 2) {
2044             use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_r);
2045          }
2046       }
2047
2048       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2049           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2050          debug_printf("%s: using floating point linear filtering for %s\n",
2051                       __FUNCTION__, bld.format_desc->short_name);
2052          debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d  wrapr %d\n",
2053                       static_sampler_state->min_img_filter,
2054                       static_sampler_state->mag_img_filter,
2055                       static_sampler_state->min_mip_filter,
2056                       static_sampler_state->wrap_s,
2057                       static_sampler_state->wrap_t,
2058                       static_sampler_state->wrap_r);
2059       }
2060
2061       lp_build_sample_common(&bld, texture_index, sampler_index,
2062                              newcoords,
2063                              derivs, lod_bias, explicit_lod,
2064                              &lod_ipart, &lod_fpart,
2065                              &ilevel0, &ilevel1);
2066
2067       /*
2068        * we only try 8-wide sampling with soa as it appears to
2069        * be a loss with aos with AVX (but it should work).
2070        * (It should be faster if we'd support avx2)
2071        */
2072       if (num_quads == 1 || !use_aos) {
2073
2074          if (num_quads > 1) {
2075             if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
2076                LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
2077                /*
2078                 * These parameters are the same for all quads,
2079                 * could probably simplify.
2080                 */
2081                lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
2082                ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
2083             }
2084          }
2085          if (use_aos) {
2086             /* do sampling/filtering with fixed pt arithmetic */
2087             lp_build_sample_aos(&bld, sampler_index,
2088                                 newcoords[0], newcoords[1],
2089                                 newcoords[2],
2090                                 offsets, lod_ipart, lod_fpart,
2091                                 ilevel0, ilevel1,
2092                                 texel_out);
2093          }
2094
2095          else {
2096             lp_build_sample_general(&bld, sampler_index,
2097                                     newcoords, offsets,
2098                                     lod_ipart, lod_fpart,
2099                                     ilevel0, ilevel1,
2100                                     texel_out);
2101          }
2102       }
2103       else {
2104          unsigned j;
2105          struct lp_build_sample_context bld4;
2106          struct lp_type type4 = type;
2107          unsigned i;
2108          LLVMValueRef texelout4[4];
2109          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2110
2111          type4.length = 4;
2112
2113          /* Setup our build context */
2114          memset(&bld4, 0, sizeof bld4);
2115          bld4.gallivm = bld.gallivm;
2116          bld4.static_texture_state = bld.static_texture_state;
2117          bld4.static_sampler_state = bld.static_sampler_state;
2118          bld4.dynamic_state = bld.dynamic_state;
2119          bld4.format_desc = bld.format_desc;
2120          bld4.dims = bld.dims;
2121          bld4.row_stride_array = bld.row_stride_array;
2122          bld4.img_stride_array = bld.img_stride_array;
2123          bld4.base_ptr = bld.base_ptr;
2124          bld4.mip_offsets = bld.mip_offsets;
2125          bld4.int_size = bld.int_size;
2126
2127          bld4.vector_width = lp_type_width(type4);
2128
2129          bld4.float_type = lp_type_float(32);
2130          bld4.int_type = lp_type_int(32);
2131          bld4.coord_type = type4;
2132          bld4.int_coord_type = lp_int_type(type4);
2133          bld4.float_size_in_type = lp_type_float(32);
2134          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2135          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2136          bld4.texel_type = bld.texel_type;
2137          bld4.texel_type.length = 4;
2138          bld4.levelf_type = type4;
2139          /* we want native vector size to be able to use our intrinsics */
2140          bld4.levelf_type.length = 1;
2141          bld4.leveli_type = lp_int_type(bld4.levelf_type);
2142
2143          if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2144              ((is_fetch && target != PIPE_BUFFER) ||
2145               (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
2146             bld4.num_lods = type4.length;
2147          else
2148             bld4.num_lods = 1;
2149
2150          bld4.levelf_type = type4;
2151          /* we want native vector size to be able to use our intrinsics */
2152          if (bld4.num_lods != type4.length) {
2153             bld4.levelf_type.length = 1;
2154          }
2155          bld4.leveli_type = lp_int_type(bld4.levelf_type);
2156          bld4.float_size_type = bld4.float_size_in_type;
2157          if (bld4.num_lods > 1) {
2158             bld4.float_size_type.length = bld4.num_lods == type4.length ?
2159                                             bld4.num_lods * bld4.float_size_in_type.length :
2160                                             type4.length;
2161          }
2162          bld4.int_size_type = lp_int_type(bld4.float_size_type);
2163
2164          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2165          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2166          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2167          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2168          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2169          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2170          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2171          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2172          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2173          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2174          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2175          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2176
2177          for (i = 0; i < num_quads; i++) {
2178             LLVMValueRef s4, t4, r4;
2179             LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
2180             LLVMValueRef ilevel04, ilevel14 = NULL;
2181             LLVMValueRef offsets4[4] = { NULL };
2182             unsigned num_lods = bld4.num_lods;
2183
2184             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2185             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2186             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2187
2188             if (offsets[0]) {
2189                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2190                if (dims > 1) {
2191                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2192                   if (dims > 2) {
2193                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2194                   }
2195                }
2196             }
2197             lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
2198             ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2199             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2200                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2201                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2202             }
2203
2204             if (use_aos) {
2205                /* do sampling/filtering with fixed pt arithmetic */
2206                lp_build_sample_aos(&bld4, sampler_index,
2207                                    s4, t4, r4, offsets4,
2208                                    lod_ipart4, lod_fpart4,
2209                                    ilevel04, ilevel14,
2210                                    texelout4);
2211             }
2212
2213             else {
2214                /* this path is currently unreachable and hence might break easily... */
2215                LLVMValueRef newcoords4[5];
2216                newcoords4[0] = s4;
2217                newcoords4[1] = t4;
2218                newcoords4[2] = r4;
2219                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2220                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2221
2222                lp_build_sample_general(&bld4, sampler_index,
2223                                        newcoords4, offsets4,
2224                                        lod_ipart4, lod_fpart4,
2225                                        ilevel04, ilevel14,
2226                                        texelout4);
2227             }
2228             for (j = 0; j < 4; j++) {
2229                texelouttmp[j][i] = texelout4[j];
2230             }
2231          }
2232
2233          for (j = 0; j < 4; j++) {
2234             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2235          }
2236       }
2237    }
2238
2239    if (target != PIPE_BUFFER) {
2240       apply_sampler_swizzle(&bld, texel_out);
2241    }
2242
2243    /*
2244     * texel type can be a (32bit) int/uint (for pure int formats only),
2245     * however we are expected to always return floats (storage is untyped).
2246     */
2247    if (!bld.texel_type.floating) {
2248       unsigned chan;
2249       for (chan = 0; chan < 4; chan++) {
2250          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2251                                             lp_build_vec_type(gallivm, type), "");
2252       }
2253    }
2254 }
2255
2256 void
2257 lp_build_size_query_soa(struct gallivm_state *gallivm,
2258                         const struct lp_static_texture_state *static_state,
2259                         struct lp_sampler_dynamic_state *dynamic_state,
2260                         struct lp_type int_type,
2261                         unsigned texture_unit,
2262                         unsigned target,
2263                         boolean is_sviewinfo,
2264                         enum lp_sampler_lod_property lod_property,
2265                         LLVMValueRef explicit_lod,
2266                         LLVMValueRef *sizes_out)
2267 {
2268    LLVMValueRef lod, level, size;
2269    LLVMValueRef first_level = NULL;
2270    int dims, i;
2271    boolean has_array;
2272    unsigned num_lods = 1;
2273    struct lp_build_context bld_int_vec4;
2274
2275    /*
2276     * Do some sanity verification about bound texture and shader dcl target.
2277     * Not entirely sure what's possible but assume array/non-array
2278     * always compatible (probably not ok for OpenGL but d3d10 has no
2279     * distinction of arrays at the resource level).
2280     * Everything else looks bogus (though not entirely sure about rect/2d).
2281     * Currently disabled because it causes assertion failures if there's
2282     * nothing bound (or rather a dummy texture, not that this case would
2283     * return the right values).
2284     */
2285    if (0 && static_state->target != target) {
2286       if (static_state->target == PIPE_TEXTURE_1D)
2287          assert(target == PIPE_TEXTURE_1D_ARRAY);
2288       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2289          assert(target == PIPE_TEXTURE_1D);
2290       else if (static_state->target == PIPE_TEXTURE_2D)
2291          assert(target == PIPE_TEXTURE_2D_ARRAY);
2292       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2293          assert(target == PIPE_TEXTURE_2D);
2294       else if (static_state->target == PIPE_TEXTURE_CUBE)
2295          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2296       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2297          assert(target == PIPE_TEXTURE_CUBE);
2298       else
2299          assert(0);
2300    }
2301
2302    dims = texture_dims(target);
2303
2304    switch (target) {
2305    case PIPE_TEXTURE_1D_ARRAY:
2306    case PIPE_TEXTURE_2D_ARRAY:
2307       has_array = TRUE;
2308       break;
2309    default:
2310       has_array = FALSE;
2311       break;
2312    }
2313
2314    assert(!int_type.floating);
2315
2316    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2317
2318    if (explicit_lod) {
2319       /* FIXME: this needs to honor per-element lod */
2320       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2321       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2322       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2323       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2324    } else {
2325       lod = bld_int_vec4.zero;
2326    }
2327
2328    size = bld_int_vec4.undef;
2329
2330    size = LLVMBuildInsertElement(gallivm->builder, size,
2331                                  dynamic_state->width(dynamic_state, gallivm, texture_unit),
2332                                  lp_build_const_int32(gallivm, 0), "");
2333
2334    if (dims >= 2) {
2335       size = LLVMBuildInsertElement(gallivm->builder, size,
2336                                     dynamic_state->height(dynamic_state, gallivm, texture_unit),
2337                                     lp_build_const_int32(gallivm, 1), "");
2338    }
2339
2340    if (dims >= 3) {
2341       size = LLVMBuildInsertElement(gallivm->builder, size,
2342                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2343                                     lp_build_const_int32(gallivm, 2), "");
2344    }
2345
2346    size = lp_build_minify(&bld_int_vec4, size, lod);
2347
2348    if (has_array)
2349       size = LLVMBuildInsertElement(gallivm->builder, size,
2350                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2351                                     lp_build_const_int32(gallivm, dims), "");
2352
2353    /*
2354     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2355     * if level is out of bounds (note this can't cover unbound texture
2356     * here, which also requires returning zero).
2357     */
2358    if (explicit_lod && is_sviewinfo) {
2359       LLVMValueRef last_level, out, out1;
2360       struct lp_build_context leveli_bld;
2361
2362       /* everything is scalar for now */
2363       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2364       last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2365
2366       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2367       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2368       out = lp_build_or(&leveli_bld, out, out1);
2369       if (num_lods == 1) {
2370          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2371       }
2372       else {
2373          /* TODO */
2374          assert(0);
2375       }
2376       size = lp_build_andnot(&bld_int_vec4, size, out);
2377    }
2378    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2379       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2380                                                 size,
2381                                                 lp_build_const_int32(gallivm, i));
2382    }
2383    if (is_sviewinfo) {
2384       for (; i < 4; i++) {
2385          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2386       }
2387    }
2388
2389    /*
2390     * if there's no explicit_lod (buffers, rects) queries requiring nr of
2391     * mips would be illegal.
2392     */
2393    if (is_sviewinfo && explicit_lod) {
2394       struct lp_build_context bld_int_scalar;
2395       LLVMValueRef num_levels;
2396       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2397
2398       if (static_state->level_zero_only) {
2399          num_levels = bld_int_scalar.one;
2400       }
2401       else {
2402          LLVMValueRef last_level;
2403
2404          last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2405          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2406          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2407       }
2408       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2409                                         num_levels);
2410    }
2411 }