src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_shader_tokens.h"
  39 #include "util/u_debug.h"
  40 #include "util/u_dump.h"
  41 #include "util/u_memory.h"
  42 #include "util/u_math.h"
  43 #include "util/u_format.h"
  44 #include "util/u_cpu_detect.h"
  45 #include "util/u_format_rgb9e5.h"
  46 #include "lp_bld_debug.h"
  47 #include "lp_bld_type.h"
  48 #include "lp_bld_const.h"
  49 #include "lp_bld_conv.h"
  50 #include "lp_bld_arit.h"
  51 #include "lp_bld_bitarit.h"
  52 #include "lp_bld_logic.h"
  53 #include "lp_bld_printf.h"
  54 #include "lp_bld_swizzle.h"
  55 #include "lp_bld_flow.h"
  56 #include "lp_bld_gather.h"
  57 #include "lp_bld_format.h"
  58 #include "lp_bld_sample.h"
  59 #include "lp_bld_sample_aos.h"
  60 #include "lp_bld_struct.h"
  61 #include "lp_bld_quad.h"
  62 #include "lp_bld_pack.h"
  63
  64
  65 /**
  66  * Generate code to fetch a texel from a texture at int coords (x, y, z).
  67  * The computation depends on whether the texture is 1D, 2D or 3D.
  68  * The result, texel, will be float vectors:
  69  *   texel[0] = red values
  70  *   texel[1] = green values
  71  *   texel[2] = blue values
  72  *   texel[3] = alpha values
  73  */
  74 static void
  75 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  76                           LLVMValueRef width,
  77                           LLVMValueRef height,
  78                           LLVMValueRef depth,
  79                           LLVMValueRef x,
  80                           LLVMValueRef y,
  81                           LLVMValueRef z,
  82                           LLVMValueRef y_stride,
  83                           LLVMValueRef z_stride,
  84                           LLVMValueRef data_ptr,
  85                           LLVMValueRef mipoffsets,
  86                           LLVMValueRef texel_out[4])
  87 {
  88    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
  89    const unsigned dims = bld->dims;
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef offset;
  93    LLVMValueRef i, j;
  94    LLVMValueRef use_border = NULL;
  95
  96    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
  97    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
  98                                               static_state->min_img_filter,
  99                                               static_state->mag_img_filter)) {
 100       LLVMValueRef b1, b2;
 101       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 102       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 103       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 104    }
 105
 106    if (dims >= 2 &&
 107        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
 108                                               static_state->min_img_filter,
 109                                               static_state->mag_img_filter)) {
 110       LLVMValueRef b1, b2;
 111       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 112       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 113       if (use_border) {
 114          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 115          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 116       }
 117       else {
 118          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 119       }
 120    }
 121
 122    if (dims == 3 &&
 123        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
 124                                               static_state->min_img_filter,
 125                                               static_state->mag_img_filter)) {
 126       LLVMValueRef b1, b2;
 127       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 128       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 129       if (use_border) {
 130          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 131          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 132       }
 133       else {
 134          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 135       }
 136    }
 137
 138    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 139    lp_build_sample_offset(&bld->int_coord_bld,
 140                           bld->format_desc,
 141                           x, y, z, y_stride, z_stride,
 142                           &offset, &i, &j);
 143    if (mipoffsets) {
 144       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 145    }
 146
 147    if (use_border) {
 148       /* If we can sample the border color, it means that texcoords may
 149        * lie outside the bounds of the texture image.  We need to do
 150        * something to prevent reading out of bounds and causing a segfault.
 151        *
 152        * Simply AND the texture coords with !use_border.  This will cause
 153        * coords which are out of bounds to become zero.  Zero's guaranteed
 154        * to be inside the texture image.
 155        */
 156       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
 157    }
 158
 159    lp_build_fetch_rgba_soa(bld->gallivm,
 160                            bld->format_desc,
 161                            bld->texel_type,
 162                            data_ptr, offset,
 163                            i, j,
 164                            texel_out);
 165
 166    /*
 167     * Note: if we find an app which frequently samples the texture border
 168     * we might want to implement a true conditional here to avoid sampling
 169     * the texture whenever possible (since that's quite a bit of code).
 170     * Ex:
 171     *   if (use_border) {
 172     *      texel = border_color;
 173     *   }
 174     *   else {
 175     *      texel = sample_texture(coord);
 176     *   }
 177     * As it is now, we always sample the texture, then selectively replace
 178     * the texel color results with the border color.
 179     */
 180
 181    if (use_border) {
 182       /* select texel color or border color depending on use_border. */
 183       const struct util_format_description *format_desc = bld->format_desc;
 184       int chan;
 185       struct lp_type border_type = bld->texel_type;
 186       border_type.length = 4;
 187       /*
 188        * Only replace channels which are actually present. The others should
 189        * get optimized away eventually by sampler_view swizzle anyway but it's
 190        * easier too.
 191        */
 192       for (chan = 0; chan < 4; chan++) {
 193          unsigned chan_s;
 194          /* reverse-map channel... */
 195          for (chan_s = 0; chan_s < 4; chan_s++) {
 196             if (chan_s == format_desc->swizzle[chan]) {
 197                break;
 198             }
 199          }
 200          if (chan_s <= 3) {
 201             /* use the already clamped color */
 202             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
 203             LLVMValueRef border_chan;
 204
 205             border_chan = lp_build_extract_broadcast(bld->gallivm,
 206                                                      border_type,
 207                                                      bld->texel_type,
 208                                                      bld->border_color_clamped,
 209                                                      idx);
 210             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 211                                               border_chan, texel_out[chan]);
 212          }
 213       }
 214    }
 215 }
 216
 217
 218 /**
 219  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 220  */
 221 static LLVMValueRef
 222 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 223                       LLVMValueRef coord)
 224 {
 225    struct lp_build_context *coord_bld = &bld->coord_bld;
 226    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 227    LLVMValueRef fract, flr, isOdd;
 228
 229    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
 230
 231    /* isOdd = flr & 1 */
 232    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
 233
 234    /* make coord positive or negative depending on isOdd */
 235    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 236
 237    /* convert isOdd to float */
 238    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 239
 240    /* add isOdd to coord */
 241    coord = lp_build_add(coord_bld, coord, isOdd);
 242
 243    return coord;
 244 }
 245
 246
 247 /**
 248  * Helper to compute the first coord and the weight for
 249  * linear wrap repeat npot textures
 250  */
 251 void
 252 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
 253                                   LLVMValueRef coord_f,
 254                                   LLVMValueRef length_i,
 255                                   LLVMValueRef length_f,
 256                                   LLVMValueRef *coord0_i,
 257                                   LLVMValueRef *weight_f)
 258 {
 259    struct lp_build_context *coord_bld = &bld->coord_bld;
 260    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 261    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 262    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 263                                                 int_coord_bld->one);
 264    LLVMValueRef mask;
 265    /* wrap with normalized floats is just fract */
 266    coord_f = lp_build_fract(coord_bld, coord_f);
 267    /* mul by size and subtract 0.5 */
 268    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 269    coord_f = lp_build_sub(coord_bld, coord_f, half);
 270    /*
 271     * we avoided the 0.5/length division before the repeat wrap,
 272     * now need to fix up edge cases with selects
 273     */
 274    /* convert to int, compute lerp weight */
 275    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
 276    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 277                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 278    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 279 }
 280
 281
 282 /**
 283  * Build LLVM code for texture wrap mode for linear filtering.
 284  * \param x0_out  returns first integer texcoord
 285  * \param x1_out  returns second integer texcoord
 286  * \param weight_out  returns linear interpolation weight
 287  */
 288 static void
 289 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 290                             LLVMValueRef coord,
 291                             LLVMValueRef length,
 292                             LLVMValueRef length_f,
 293                             LLVMValueRef offset,
 294                             boolean is_pot,
 295                             unsigned wrap_mode,
 296                             LLVMValueRef *x0_out,
 297                             LLVMValueRef *x1_out,
 298                             LLVMValueRef *weight_out)
 299 {
 300    struct lp_build_context *coord_bld = &bld->coord_bld;
 301    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 302    LLVMBuilderRef builder = bld->gallivm->builder;
 303    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 304    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 305    LLVMValueRef coord0, coord1, weight;
 306
 307    switch(wrap_mode) {
 308    case PIPE_TEX_WRAP_REPEAT:
 309       if (is_pot) {
 310          /* mul by size and subtract 0.5 */
 311          coord = lp_build_mul(coord_bld, coord, length_f);
 312          coord = lp_build_sub(coord_bld, coord, half);
 313          if (offset) {
 314             offset = lp_build_int_to_float(coord_bld, offset);
 315             coord = lp_build_add(coord_bld, coord, offset);
 316          }
 317          /* convert to int, compute lerp weight */
 318          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 319          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 320          /* repeat wrap */
 321          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 322          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 323       }
 324       else {
 325          LLVMValueRef mask;
 326          if (offset) {
 327             offset = lp_build_int_to_float(coord_bld, offset);
 328             offset = lp_build_div(coord_bld, offset, length_f);
 329             coord = lp_build_add(coord_bld, coord, offset);
 330          }
 331          lp_build_coord_repeat_npot_linear(bld, coord,
 332                                            length, length_f,
 333                                            &coord0, &weight);
 334          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 335                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 336          coord1 = LLVMBuildAnd(builder,
 337                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
 338                                mask, "");
 339       }
 340       break;
 341
 342    case PIPE_TEX_WRAP_CLAMP:
 343       if (bld->static_sampler_state->normalized_coords) {
 344          /* scale coord to length */
 345          coord = lp_build_mul(coord_bld, coord, length_f);
 346       }
 347       if (offset) {
 348          offset = lp_build_int_to_float(coord_bld, offset);
 349          coord = lp_build_add(coord_bld, coord, offset);
 350       }
 351
 352       /* clamp to [0, length] */
 353       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 354
 355       coord = lp_build_sub(coord_bld, coord, half);
 356
 357       /* convert to int, compute lerp weight */
 358       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 359       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 360       break;
 361
 362    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 363       {
 364          struct lp_build_context abs_coord_bld = bld->coord_bld;
 365          abs_coord_bld.type.sign = FALSE;
 366
 367          if (bld->static_sampler_state->normalized_coords) {
 368             /* mul by tex size */
 369             coord = lp_build_mul(coord_bld, coord, length_f);
 370          }
 371          if (offset) {
 372             offset = lp_build_int_to_float(coord_bld, offset);
 373             coord = lp_build_add(coord_bld, coord, offset);
 374          }
 375
 376          /* clamp to length max */
 377          coord = lp_build_min(coord_bld, coord, length_f);
 378          /* subtract 0.5 */
 379          coord = lp_build_sub(coord_bld, coord, half);
 380          /* clamp to [0, length - 0.5] */
 381          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 382          /* convert to int, compute lerp weight */
 383          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 384          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 385          /* coord1 = min(coord1, length-1) */
 386          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 387          break;
 388       }
 389
 390    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 391       if (bld->static_sampler_state->normalized_coords) {
 392          /* scale coord to length */
 393          coord = lp_build_mul(coord_bld, coord, length_f);
 394       }
 395       if (offset) {
 396          offset = lp_build_int_to_float(coord_bld, offset);
 397          coord = lp_build_add(coord_bld, coord, offset);
 398       }
 399       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
 400       /* can skip clamp (though might not work for very large coord values */
 401       coord = lp_build_sub(coord_bld, coord, half);
 402       /* convert to int, compute lerp weight */
 403       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 404       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 405       break;
 406
 407    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 408       /* compute mirror function */
 409       coord = lp_build_coord_mirror(bld, coord);
 410
 411       /* scale coord to length */
 412       coord = lp_build_mul(coord_bld, coord, length_f);
 413       coord = lp_build_sub(coord_bld, coord, half);
 414       if (offset) {
 415          offset = lp_build_int_to_float(coord_bld, offset);
 416          coord = lp_build_add(coord_bld, coord, offset);
 417       }
 418
 419       /* convert to int, compute lerp weight */
 420       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 421       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 422
 423       /* coord0 = max(coord0, 0) */
 424       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 425       /* coord1 = min(coord1, length-1) */
 426       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 427       break;
 428
 429    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 430       if (bld->static_sampler_state->normalized_coords) {
 431          /* scale coord to length */
 432          coord = lp_build_mul(coord_bld, coord, length_f);
 433       }
 434       if (offset) {
 435          offset = lp_build_int_to_float(coord_bld, offset);
 436          coord = lp_build_add(coord_bld, coord, offset);
 437       }
 438       coord = lp_build_abs(coord_bld, coord);
 439
 440       /* clamp to [0, length] */
 441       coord = lp_build_min(coord_bld, coord, length_f);
 442
 443       coord = lp_build_sub(coord_bld, coord, half);
 444
 445       /* convert to int, compute lerp weight */
 446       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 447       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 448       break;
 449
 450    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 451       {
 452          struct lp_build_context abs_coord_bld = bld->coord_bld;
 453          abs_coord_bld.type.sign = FALSE;
 454
 455          if (bld->static_sampler_state->normalized_coords) {
 456             /* scale coord to length */
 457             coord = lp_build_mul(coord_bld, coord, length_f);
 458          }
 459          if (offset) {
 460             offset = lp_build_int_to_float(coord_bld, offset);
 461             coord = lp_build_add(coord_bld, coord, offset);
 462          }
 463          coord = lp_build_abs(coord_bld, coord);
 464
 465          /* clamp to length max */
 466          coord = lp_build_min(coord_bld, coord, length_f);
 467          /* subtract 0.5 */
 468          coord = lp_build_sub(coord_bld, coord, half);
 469          /* clamp to [0, length - 0.5] */
 470          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 471
 472          /* convert to int, compute lerp weight */
 473          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 474          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 475          /* coord1 = min(coord1, length-1) */
 476          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 477       }
 478       break;
 479
 480    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 481       {
 482          if (bld->static_sampler_state->normalized_coords) {
 483             /* scale coord to length */
 484             coord = lp_build_mul(coord_bld, coord, length_f);
 485          }
 486          if (offset) {
 487             offset = lp_build_int_to_float(coord_bld, offset);
 488             coord = lp_build_add(coord_bld, coord, offset);
 489          }
 490          coord = lp_build_abs(coord_bld, coord);
 491
 492          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
 493          /* skip clamp - always positive, and other side
 494             only potentially matters for very large coords */
 495          coord = lp_build_sub(coord_bld, coord, half);
 496
 497          /* convert to int, compute lerp weight */
 498          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 499          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 500       }
 501       break;
 502
 503    default:
 504       assert(0);
 505       coord0 = NULL;
 506       coord1 = NULL;
 507       weight = NULL;
 508    }
 509
 510    *x0_out = coord0;
 511    *x1_out = coord1;
 512    *weight_out = weight;
 513 }
 514
 515
 516 /**
 517  * Build LLVM code for texture wrap mode for nearest filtering.
 518  * \param coord  the incoming texcoord (nominally in [0,1])
 519  * \param length  the texture size along one dimension, as int vector
 520  * \param length_f  the texture size along one dimension, as float vector
 521  * \param offset  texel offset along one dimension (as int vector)
 522  * \param is_pot  if TRUE, length is a power of two
 523  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 524  */
 525 static LLVMValueRef
 526 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 527                              LLVMValueRef coord,
 528                              LLVMValueRef length,
 529                              LLVMValueRef length_f,
 530                              LLVMValueRef offset,
 531                              boolean is_pot,
 532                              unsigned wrap_mode)
 533 {
 534    struct lp_build_context *coord_bld = &bld->coord_bld;
 535    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 536    LLVMBuilderRef builder = bld->gallivm->builder;
 537    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 538    LLVMValueRef icoord;
 539
 540    switch(wrap_mode) {
 541    case PIPE_TEX_WRAP_REPEAT:
 542       if (is_pot) {
 543          coord = lp_build_mul(coord_bld, coord, length_f);
 544          icoord = lp_build_ifloor(coord_bld, coord);
 545          if (offset) {
 546             icoord = lp_build_add(int_coord_bld, icoord, offset);
 547          }
 548          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
 549       }
 550       else {
 551           if (offset) {
 552              offset = lp_build_int_to_float(coord_bld, offset);
 553              offset = lp_build_div(coord_bld, offset, length_f);
 554              coord = lp_build_add(coord_bld, coord, offset);
 555           }
 556           /* take fraction, unnormalize */
 557           coord = lp_build_fract_safe(coord_bld, coord);
 558           coord = lp_build_mul(coord_bld, coord, length_f);
 559           icoord = lp_build_itrunc(coord_bld, coord);
 560       }
 561       break;
 562
 563    case PIPE_TEX_WRAP_CLAMP:
 564    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 565       if (bld->static_sampler_state->normalized_coords) {
 566          /* scale coord to length */
 567          coord = lp_build_mul(coord_bld, coord, length_f);
 568       }
 569
 570       /* floor */
 571       /* use itrunc instead since we clamp to 0 anyway */
 572       icoord = lp_build_itrunc(coord_bld, coord);
 573       if (offset) {
 574          icoord = lp_build_add(int_coord_bld, icoord, offset);
 575       }
 576
 577       /* clamp to [0, length - 1]. */
 578       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 579                               length_minus_one);
 580       break;
 581
 582    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 583       if (bld->static_sampler_state->normalized_coords) {
 584          /* scale coord to length */
 585          coord = lp_build_mul(coord_bld, coord, length_f);
 586       }
 587       /* no clamp necessary, border masking will handle this */
 588       icoord = lp_build_ifloor(coord_bld, coord);
 589       if (offset) {
 590          icoord = lp_build_add(int_coord_bld, icoord, offset);
 591       }
 592       break;
 593
 594    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 595       if (offset) {
 596          offset = lp_build_int_to_float(coord_bld, offset);
 597          offset = lp_build_div(coord_bld, offset, length_f);
 598          coord = lp_build_add(coord_bld, coord, offset);
 599       }
 600       /* compute mirror function */
 601       coord = lp_build_coord_mirror(bld, coord);
 602
 603       /* scale coord to length */
 604       assert(bld->static_sampler_state->normalized_coords);
 605       coord = lp_build_mul(coord_bld, coord, length_f);
 606
 607       /* itrunc == ifloor here */
 608       icoord = lp_build_itrunc(coord_bld, coord);
 609
 610       /* clamp to [0, length - 1] */
 611       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 612       break;
 613
 614    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 615    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 616       if (bld->static_sampler_state->normalized_coords) {
 617          /* scale coord to length */
 618          coord = lp_build_mul(coord_bld, coord, length_f);
 619       }
 620       if (offset) {
 621          offset = lp_build_int_to_float(coord_bld, offset);
 622          coord = lp_build_add(coord_bld, coord, offset);
 623       }
 624       coord = lp_build_abs(coord_bld, coord);
 625
 626       /* itrunc == ifloor here */
 627       icoord = lp_build_itrunc(coord_bld, coord);
 628
 629       /* clamp to [0, length - 1] */
 630       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 631       break;
 632
 633    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 634       if (bld->static_sampler_state->normalized_coords) {
 635          /* scale coord to length */
 636          coord = lp_build_mul(coord_bld, coord, length_f);
 637       }
 638       if (offset) {
 639          offset = lp_build_int_to_float(coord_bld, offset);
 640          coord = lp_build_add(coord_bld, coord, offset);
 641       }
 642       coord = lp_build_abs(coord_bld, coord);
 643
 644       /* itrunc == ifloor here */
 645       icoord = lp_build_itrunc(coord_bld, coord);
 646       break;
 647
 648    default:
 649       assert(0);
 650       icoord = NULL;
 651    }
 652
 653    return icoord;
 654 }
 655
 656
 657 /**
 658  * Do shadow test/comparison.
 659  * \param p shadow ref value
 660  * \param texel  the texel to compare against
 661  */
 662 static LLVMValueRef
 663 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
 664                             LLVMValueRef p,
 665                             LLVMValueRef texel)
 666 {
 667    struct lp_build_context *texel_bld = &bld->texel_bld;
 668    LLVMValueRef res;
 669
 670    if (0) {
 671       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
 672       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
 673    }
 674
 675    /* result = (p FUNC texel) ? 1 : 0 */
 676    /*
 677     * honor d3d10 floating point rules here, which state that comparisons
 678     * are ordered except NOT_EQUAL which is unordered.
 679     */
 680    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
 681       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
 682                                  p, texel);
 683    }
 684    else {
 685       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
 686                          p, texel);
 687    }
 688    return res;
 689 }
 690
 691
 692 /**
 693  * Generate code to sample a mipmap level with nearest filtering.
 694  * If sampling a cube texture, r = cube face in [0,5].
 695  */
 696 static void
 697 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 698                               LLVMValueRef size,
 699                               LLVMValueRef row_stride_vec,
 700                               LLVMValueRef img_stride_vec,
 701                               LLVMValueRef data_ptr,
 702                               LLVMValueRef mipoffsets,
 703                               LLVMValueRef *coords,
 704                               const LLVMValueRef *offsets,
 705                               LLVMValueRef colors_out[4])
 706 {
 707    const unsigned dims = bld->dims;
 708    LLVMValueRef width_vec;
 709    LLVMValueRef height_vec;
 710    LLVMValueRef depth_vec;
 711    LLVMValueRef flt_size;
 712    LLVMValueRef flt_width_vec;
 713    LLVMValueRef flt_height_vec;
 714    LLVMValueRef flt_depth_vec;
 715    LLVMValueRef x, y = NULL, z = NULL;
 716
 717    lp_build_extract_image_sizes(bld,
 718                                 &bld->int_size_bld,
 719                                 bld->int_coord_type,
 720                                 size,
 721                                 &width_vec, &height_vec, &depth_vec);
 722
 723    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 724
 725    lp_build_extract_image_sizes(bld,
 726                                 &bld->float_size_bld,
 727                                 bld->coord_type,
 728                                 flt_size,
 729                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 730
 731    /*
 732     * Compute integer texcoords.
 733     */
 734    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
 735                                     flt_width_vec, offsets[0],
 736                                     bld->static_texture_state->pot_width,
 737                                     bld->static_sampler_state->wrap_s);
 738    lp_build_name(x, "tex.x.wrapped");
 739
 740    if (dims >= 2) {
 741       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
 742                                        flt_height_vec, offsets[1],
 743                                        bld->static_texture_state->pot_height,
 744                                        bld->static_sampler_state->wrap_t);
 745       lp_build_name(y, "tex.y.wrapped");
 746
 747       if (dims == 3) {
 748          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
 749                                           flt_depth_vec, offsets[2],
 750                                           bld->static_texture_state->pot_depth,
 751                                           bld->static_sampler_state->wrap_r);
 752          lp_build_name(z, "tex.z.wrapped");
 753       }
 754    }
 755    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 756        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 757        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 758       z = coords[2];
 759       lp_build_name(z, "tex.z.layer");
 760    }
 761
 762    /*
 763     * Get texture colors.
 764     */
 765    lp_build_sample_texel_soa(bld,
 766                              width_vec, height_vec, depth_vec,
 767                              x, y, z,
 768                              row_stride_vec, img_stride_vec,
 769                              data_ptr, mipoffsets, colors_out);
 770
 771    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
 772       LLVMValueRef cmpval;
 773       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
 774       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
 775       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
 776                                       bld->texel_bld.one, bld->texel_bld.zero);
 777       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 778    }
 779
 780 }
 781
 782
 783 /**
 784  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
 785  */
 786 static LLVMValueRef
 787 lp_build_masklerp(struct lp_build_context *bld,
 788                  LLVMValueRef weight,
 789                  LLVMValueRef mask0,
 790                  LLVMValueRef mask1)
 791 {
 792    struct gallivm_state *gallivm = bld->gallivm;
 793    LLVMBuilderRef builder = gallivm->builder;
 794    LLVMValueRef weight2;
 795
 796    weight2 = lp_build_sub(bld, bld->one, weight);
 797    weight = LLVMBuildBitCast(builder, weight,
 798                               lp_build_int_vec_type(gallivm, bld->type), "");
 799    weight2 = LLVMBuildBitCast(builder, weight2,
 800                               lp_build_int_vec_type(gallivm, bld->type), "");
 801    weight = LLVMBuildAnd(builder, weight, mask1, "");
 802    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
 803    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
 804    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
 805    return lp_build_add(bld, weight, weight2);
 806 }
 807
 808 /**
 809  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
 810  */
 811 static LLVMValueRef
 812 lp_build_masklerp2d(struct lp_build_context *bld,
 813                     LLVMValueRef weight0,
 814                     LLVMValueRef weight1,
 815                     LLVMValueRef mask00,
 816                     LLVMValueRef mask01,
 817                     LLVMValueRef mask10,
 818                     LLVMValueRef mask11)
 819 {
 820    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
 821    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
 822    return lp_build_lerp(bld, weight1, val0, val1, 0);
 823 }
 824
 825 /**
 826  * Generate code to sample a mipmap level with linear filtering.
 827  * If sampling a cube texture, r = cube face in [0,5].
 828  * If linear_mask is present, only pixels having their mask set
 829  * will receive linear filtering, the rest will use nearest.
 830  */
 831 static void
 832 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 833                              LLVMValueRef size,
 834                              LLVMValueRef linear_mask,
 835                              LLVMValueRef row_stride_vec,
 836                              LLVMValueRef img_stride_vec,
 837                              LLVMValueRef data_ptr,
 838                              LLVMValueRef mipoffsets,
 839                              LLVMValueRef *coords,
 840                              const LLVMValueRef *offsets,
 841                              LLVMValueRef colors_out[4])
 842 {
 843    const unsigned dims = bld->dims;
 844    LLVMValueRef width_vec;
 845    LLVMValueRef height_vec;
 846    LLVMValueRef depth_vec;
 847    LLVMValueRef flt_size;
 848    LLVMValueRef flt_width_vec;
 849    LLVMValueRef flt_height_vec;
 850    LLVMValueRef flt_depth_vec;
 851    LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
 852    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
 853    LLVMValueRef neighbors[2][2][4];
 854    int chan;
 855
 856    lp_build_extract_image_sizes(bld,
 857                                 &bld->int_size_bld,
 858                                 bld->int_coord_type,
 859                                 size,
 860                                 &width_vec, &height_vec, &depth_vec);
 861
 862    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 863
 864    lp_build_extract_image_sizes(bld,
 865                                 &bld->float_size_bld,
 866                                 bld->coord_type,
 867                                 flt_size,
 868                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 869
 870    /*
 871     * Compute integer texcoords.
 872     */
 873    lp_build_sample_wrap_linear(bld, coords[0], width_vec,
 874                                flt_width_vec, offsets[0],
 875                                bld->static_texture_state->pot_width,
 876                                bld->static_sampler_state->wrap_s,
 877                                &x0, &x1, &s_fpart);
 878    lp_build_name(x0, "tex.x0.wrapped");
 879    lp_build_name(x1, "tex.x1.wrapped");
 880
 881    if (dims >= 2) {
 882       lp_build_sample_wrap_linear(bld, coords[1], height_vec,
 883                                   flt_height_vec, offsets[1],
 884                                   bld->static_texture_state->pot_height,
 885                                   bld->static_sampler_state->wrap_t,
 886                                   &y0, &y1, &t_fpart);
 887       lp_build_name(y0, "tex.y0.wrapped");
 888       lp_build_name(y1, "tex.y1.wrapped");
 889
 890       if (dims == 3) {
 891          lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
 892                                      flt_depth_vec, offsets[2],
 893                                      bld->static_texture_state->pot_depth,
 894                                      bld->static_sampler_state->wrap_r,
 895                                      &z0, &z1, &r_fpart);
 896          lp_build_name(z0, "tex.z0.wrapped");
 897          lp_build_name(z1, "tex.z1.wrapped");
 898       }
 899    }
 900    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 901        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 902        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 903       z0 = z1 = coords[2];  /* cube face or layer */
 904       lp_build_name(z0, "tex.z0.layer");
 905       lp_build_name(z1, "tex.z1.layer");
 906    }
 907
 908    if (linear_mask) {
 909       /*
 910        * Whack filter weights into place. Whatever pixel had more weight is
 911        * the one which should have been selected by nearest filtering hence
 912        * just use 100% weight for it.
 913        */
 914       struct lp_build_context *c_bld = &bld->coord_bld;
 915       LLVMValueRef w1_mask, w1_weight;
 916       LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
 917
 918       w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
 919       /* this select is really just a "and" */
 920       w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
 921       s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
 922       if (dims >= 2) {
 923          w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
 924          w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
 925          t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
 926          if (dims == 3) {
 927             w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
 928             w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
 929             r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
 930          }
 931       }
 932    }
 933
 934    /*
 935     * Get texture colors.
 936     */
 937    /* get x0/x1 texels */
 938    lp_build_sample_texel_soa(bld,
 939                              width_vec, height_vec, depth_vec,
 940                              x0, y0, z0,
 941                              row_stride_vec, img_stride_vec,
 942                              data_ptr, mipoffsets, neighbors[0][0]);
 943    lp_build_sample_texel_soa(bld,
 944                              width_vec, height_vec, depth_vec,
 945                              x1, y0, z0,
 946                              row_stride_vec, img_stride_vec,
 947                              data_ptr, mipoffsets, neighbors[0][1]);
 948
 949    if (dims == 1) {
 950       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
 951          /* Interpolate two samples from 1D image to produce one color */
 952          for (chan = 0; chan < 4; chan++) {
 953             colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
 954                                              neighbors[0][0][chan],
 955                                              neighbors[0][1][chan],
 956                                              0);
 957          }
 958       }
 959       else {
 960          LLVMValueRef cmpval0, cmpval1;
 961          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
 962          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
 963          /* simplified lerp, AND mask with weight and add */
 964          colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
 965                                            cmpval0, cmpval1);
 966          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
 967       }
 968    }
 969    else {
 970       /* 2D/3D texture */
 971       LLVMValueRef colors0[4];
 972
 973       /* get x0/x1 texels at y1 */
 974       lp_build_sample_texel_soa(bld,
 975                                 width_vec, height_vec, depth_vec,
 976                                 x0, y1, z0,
 977                                 row_stride_vec, img_stride_vec,
 978                                 data_ptr, mipoffsets, neighbors[1][0]);
 979       lp_build_sample_texel_soa(bld,
 980                                 width_vec, height_vec, depth_vec,
 981                                 x1, y1, z0,
 982                                 row_stride_vec, img_stride_vec,
 983                                 data_ptr, mipoffsets, neighbors[1][1]);
 984
 985       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
 986          /* Bilinear interpolate the four samples from the 2D image / 3D slice */
 987          for (chan = 0; chan < 4; chan++) {
 988             colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
 989                                              s_fpart, t_fpart,
 990                                              neighbors[0][0][chan],
 991                                              neighbors[0][1][chan],
 992                                              neighbors[1][0][chan],
 993                                              neighbors[1][1][chan],
 994                                              0);
 995          }
 996       }
 997       else {
 998          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
 999          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1000          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1001          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1002          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1003          colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1004                                           cmpval00, cmpval01, cmpval10, cmpval11);
1005          colors0[1] = colors0[2] = colors0[3] = colors0[0];
1006       }
1007
1008       if (dims == 3) {
1009          LLVMValueRef neighbors1[2][2][4];
1010          LLVMValueRef colors1[4];
1011
1012          /* get x0/x1/y0/y1 texels at z1 */
1013          lp_build_sample_texel_soa(bld,
1014                                    width_vec, height_vec, depth_vec,
1015                                    x0, y0, z1,
1016                                    row_stride_vec, img_stride_vec,
1017                                    data_ptr, mipoffsets, neighbors1[0][0]);
1018          lp_build_sample_texel_soa(bld,
1019                                    width_vec, height_vec, depth_vec,
1020                                    x1, y0, z1,
1021                                    row_stride_vec, img_stride_vec,
1022                                    data_ptr, mipoffsets, neighbors1[0][1]);
1023          lp_build_sample_texel_soa(bld,
1024                                    width_vec, height_vec, depth_vec,
1025                                    x0, y1, z1,
1026                                    row_stride_vec, img_stride_vec,
1027                                    data_ptr, mipoffsets, neighbors1[1][0]);
1028          lp_build_sample_texel_soa(bld,
1029                                    width_vec, height_vec, depth_vec,
1030                                    x1, y1, z1,
1031                                    row_stride_vec, img_stride_vec,
1032                                    data_ptr, mipoffsets, neighbors1[1][1]);
1033
1034          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1035             /* Bilinear interpolate the four samples from the second Z slice */
1036             for (chan = 0; chan < 4; chan++) {
1037                colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1038                                                 s_fpart, t_fpart,
1039                                                 neighbors1[0][0][chan],
1040                                                 neighbors1[0][1][chan],
1041                                                 neighbors1[1][0][chan],
1042                                                 neighbors1[1][1][chan],
1043                                                 0);
1044             }
1045             /* Linearly interpolate the two samples from the two 3D slices */
1046             for (chan = 0; chan < 4; chan++) {
1047                colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1048                                                 r_fpart,
1049                                                 colors0[chan], colors1[chan],
1050                                                 0);
1051             }
1052          }
1053          else {
1054             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1055             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1056             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1057             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1058             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1059             colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1060                                              cmpval00, cmpval01, cmpval10, cmpval11);
1061             /* Linearly interpolate the two samples from the two 3D slices */
1062             colors_out[0] = lp_build_lerp(&bld->texel_bld,
1063                                              r_fpart,
1064                                              colors0[0], colors1[0],
1065                                              0);
1066             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1067          }
1068       }
1069       else {
1070          /* 2D tex */
1071          for (chan = 0; chan < 4; chan++) {
1072             colors_out[chan] = colors0[chan];
1073          }
1074       }
1075    }
1076 }
1077
1078
1079 /**
1080  * Sample the texture/mipmap using given image filter and mip filter.
1081  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1082  * from (vectors or scalars).
1083  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1084  */
1085 static void
1086 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1087                        unsigned img_filter,
1088                        unsigned mip_filter,
1089                        LLVMValueRef *coords,
1090                        const LLVMValueRef *offsets,
1091                        LLVMValueRef ilevel0,
1092                        LLVMValueRef ilevel1,
1093                        LLVMValueRef lod_fpart,
1094                        LLVMValueRef *colors_out)
1095 {
1096    LLVMBuilderRef builder = bld->gallivm->builder;
1097    LLVMValueRef size0 = NULL;
1098    LLVMValueRef size1 = NULL;
1099    LLVMValueRef row_stride0_vec = NULL;
1100    LLVMValueRef row_stride1_vec = NULL;
1101    LLVMValueRef img_stride0_vec = NULL;
1102    LLVMValueRef img_stride1_vec = NULL;
1103    LLVMValueRef data_ptr0 = NULL;
1104    LLVMValueRef data_ptr1 = NULL;
1105    LLVMValueRef mipoff0 = NULL;
1106    LLVMValueRef mipoff1 = NULL;
1107    LLVMValueRef colors0[4], colors1[4];
1108    unsigned chan;
1109
1110    /* sample the first mipmap level */
1111    lp_build_mipmap_level_sizes(bld, ilevel0,
1112                                &size0,
1113                                &row_stride0_vec, &img_stride0_vec);
1114    if (bld->num_mips == 1) {
1115       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1116    }
1117    else {
1118       /* This path should work for num_lods 1 too but slightly less efficient */
1119       data_ptr0 = bld->base_ptr;
1120       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1121    }
1122    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1123       lp_build_sample_image_nearest(bld, size0,
1124                                     row_stride0_vec, img_stride0_vec,
1125                                     data_ptr0, mipoff0, coords, offsets,
1126                                     colors0);
1127    }
1128    else {
1129       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1130       lp_build_sample_image_linear(bld, size0, NULL,
1131                                    row_stride0_vec, img_stride0_vec,
1132                                    data_ptr0, mipoff0, coords, offsets,
1133                                    colors0);
1134    }
1135
1136    /* Store the first level's colors in the output variables */
1137    for (chan = 0; chan < 4; chan++) {
1138        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1139    }
1140
1141    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1142       struct lp_build_if_state if_ctx;
1143       LLVMValueRef need_lerp;
1144
1145       /* need_lerp = lod_fpart > 0 */
1146       if (bld->num_lods == 1) {
1147          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1148                                    lod_fpart, bld->lodf_bld.zero,
1149                                    "need_lerp");
1150       }
1151       else {
1152          /*
1153           * We'll do mip filtering if any of the quads (or individual
1154           * pixel in case of per-pixel lod) need it.
1155           * It might be better to split the vectors here and only fetch/filter
1156           * quads which need it (if there's one lod per quad).
1157           */
1158          need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1159                                       PIPE_FUNC_GREATER,
1160                                       lod_fpart, bld->lodf_bld.zero);
1161          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1162       }
1163
1164       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1165       {
1166          /*
1167           * We unfortunately need to clamp lod_fpart here since we can get
1168           * negative values which would screw up filtering if not all
1169           * lod_fpart values have same sign.
1170           */
1171          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1172                                   bld->lodf_bld.zero);
1173          /* sample the second mipmap level */
1174          lp_build_mipmap_level_sizes(bld, ilevel1,
1175                                      &size1,
1176                                      &row_stride1_vec, &img_stride1_vec);
1177          if (bld->num_mips == 1) {
1178             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1179          }
1180          else {
1181             data_ptr1 = bld->base_ptr;
1182             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1183          }
1184          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1185             lp_build_sample_image_nearest(bld, size1,
1186                                           row_stride1_vec, img_stride1_vec,
1187                                           data_ptr1, mipoff1, coords, offsets,
1188                                           colors1);
1189          }
1190          else {
1191             lp_build_sample_image_linear(bld, size1, NULL,
1192                                          row_stride1_vec, img_stride1_vec,
1193                                          data_ptr1, mipoff1, coords, offsets,
1194                                          colors1);
1195          }
1196
1197          /* interpolate samples from the two mipmap levels */
1198
1199          if (bld->num_lods != bld->coord_type.length)
1200             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1201                                                               bld->lodf_bld.type,
1202                                                               bld->texel_bld.type,
1203                                                               lod_fpart);
1204
1205          for (chan = 0; chan < 4; chan++) {
1206             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1207                                           colors0[chan], colors1[chan],
1208                                           0);
1209             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1210          }
1211       }
1212       lp_build_endif(&if_ctx);
1213    }
1214 }
1215
1216
1217 /**
1218  * Sample the texture/mipmap using given mip filter, and using
1219  * both nearest and linear filtering at the same time depending
1220  * on linear_mask.
1221  * lod can be per quad but linear_mask is always per pixel.
1222  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1223  * from (vectors or scalars).
1224  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1225  */
1226 static void
1227 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1228                             LLVMValueRef linear_mask,
1229                             unsigned mip_filter,
1230                             LLVMValueRef *coords,
1231                             const LLVMValueRef *offsets,
1232                             LLVMValueRef ilevel0,
1233                             LLVMValueRef ilevel1,
1234                             LLVMValueRef lod_fpart,
1235                             LLVMValueRef lod_positive,
1236                             LLVMValueRef *colors_out)
1237 {
1238    LLVMBuilderRef builder = bld->gallivm->builder;
1239    LLVMValueRef size0 = NULL;
1240    LLVMValueRef size1 = NULL;
1241    LLVMValueRef row_stride0_vec = NULL;
1242    LLVMValueRef row_stride1_vec = NULL;
1243    LLVMValueRef img_stride0_vec = NULL;
1244    LLVMValueRef img_stride1_vec = NULL;
1245    LLVMValueRef data_ptr0 = NULL;
1246    LLVMValueRef data_ptr1 = NULL;
1247    LLVMValueRef mipoff0 = NULL;
1248    LLVMValueRef mipoff1 = NULL;
1249    LLVMValueRef colors0[4], colors1[4];
1250    unsigned chan;
1251
1252    /* sample the first mipmap level */
1253    lp_build_mipmap_level_sizes(bld, ilevel0,
1254                                &size0,
1255                                &row_stride0_vec, &img_stride0_vec);
1256    if (bld->num_mips == 1) {
1257       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1258    }
1259    else {
1260       /* This path should work for num_lods 1 too but slightly less efficient */
1261       data_ptr0 = bld->base_ptr;
1262       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1263    }
1264
1265    lp_build_sample_image_linear(bld, size0, linear_mask,
1266                                 row_stride0_vec, img_stride0_vec,
1267                                 data_ptr0, mipoff0, coords, offsets,
1268                                 colors0);
1269
1270    /* Store the first level's colors in the output variables */
1271    for (chan = 0; chan < 4; chan++) {
1272        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1273    }
1274
1275    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1276       struct lp_build_if_state if_ctx;
1277       LLVMValueRef need_lerp;
1278
1279       /*
1280        * We'll do mip filtering if any of the quads (or individual
1281        * pixel in case of per-pixel lod) need it.
1282        * Note using lod_positive here not lod_fpart since it may be the same
1283        * condition as that used in the outer "if" in the caller hence llvm
1284        * should be able to merge the branches in this case.
1285        */
1286       need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1287
1288       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1289       {
1290          /*
1291           * We unfortunately need to clamp lod_fpart here since we can get
1292           * negative values which would screw up filtering if not all
1293           * lod_fpart values have same sign.
1294           */
1295          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1296                                   bld->lodf_bld.zero);
1297          /* sample the second mipmap level */
1298          lp_build_mipmap_level_sizes(bld, ilevel1,
1299                                      &size1,
1300                                      &row_stride1_vec, &img_stride1_vec);
1301          if (bld->num_mips == 1) {
1302             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1303          }
1304          else {
1305             data_ptr1 = bld->base_ptr;
1306             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1307          }
1308
1309          lp_build_sample_image_linear(bld, size1, linear_mask,
1310                                       row_stride1_vec, img_stride1_vec,
1311                                       data_ptr1, mipoff1, coords, offsets,
1312                                       colors1);
1313
1314          /* interpolate samples from the two mipmap levels */
1315
1316          if (bld->num_lods != bld->coord_type.length)
1317             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1318                                                               bld->lodf_bld.type,
1319                                                               bld->texel_bld.type,
1320                                                               lod_fpart);
1321
1322          for (chan = 0; chan < 4; chan++) {
1323             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1324                                           colors0[chan], colors1[chan],
1325                                           0);
1326             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1327          }
1328       }
1329       lp_build_endif(&if_ctx);
1330    }
1331 }
1332
1333
1334 /**
1335  * Build (per-coord) layer value.
1336  * Either clamp layer to valid values or fill in optional out_of_bounds
1337  * value and just return value unclamped.
1338  */
1339 static LLVMValueRef
1340 lp_build_layer_coord(struct lp_build_sample_context *bld,
1341                      unsigned texture_unit,
1342                      LLVMValueRef layer,
1343                      LLVMValueRef *out_of_bounds)
1344 {
1345    LLVMValueRef num_layers;
1346    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1347
1348    num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1349                                           bld->gallivm, texture_unit);
1350
1351    if (out_of_bounds) {
1352       LLVMValueRef out1, out;
1353       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1354       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1355       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1356       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1357       return layer;
1358    }
1359    else {
1360       LLVMValueRef maxlayer;
1361       maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1362       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1363       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1364    }
1365 }
1366
1367
1368 /**
1369  * Calculate cube face, lod, mip levels.
1370  */
1371 static void
1372 lp_build_sample_common(struct lp_build_sample_context *bld,
1373                        unsigned texture_index,
1374                        unsigned sampler_index,
1375                        LLVMValueRef *coords,
1376                        const struct lp_derivatives *derivs, /* optional */
1377                        LLVMValueRef lod_bias, /* optional */
1378                        LLVMValueRef explicit_lod, /* optional */
1379                        LLVMValueRef *lod_pos_or_zero,
1380                        LLVMValueRef *lod_fpart,
1381                        LLVMValueRef *ilevel0,
1382                        LLVMValueRef *ilevel1)
1383 {
1384    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1385    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1386    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1387    const unsigned target = bld->static_texture_state->target;
1388    LLVMValueRef first_level, cube_rho = NULL;
1389    LLVMValueRef lod_ipart = NULL;
1390    struct lp_derivatives cube_derivs;
1391
1392    /*
1393    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1394           mip_filter, min_filter, mag_filter);
1395    */
1396
1397    /*
1398     * Choose cube face, recompute texcoords for the chosen face and
1399     * compute rho here too (as it requires transform of derivatives).
1400     */
1401    if (target == PIPE_TEXTURE_CUBE) {
1402       boolean need_derivs;
1403       need_derivs = ((min_filter != mag_filter ||
1404                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1405                       !bld->static_sampler_state->min_max_lod_equal &&
1406                       !explicit_lod);
1407       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1408       derivs = &cube_derivs;
1409    }
1410    else if (target == PIPE_TEXTURE_1D_ARRAY ||
1411             target == PIPE_TEXTURE_2D_ARRAY) {
1412       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1413       coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
1414    }
1415
1416    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1417       /*
1418        * Clamp p coords to [0,1] for fixed function depth texture format here.
1419        * Technically this is not entirely correct for unorm depth as the ref value
1420        * should be converted to the depth format (quantization!) and comparison
1421        * then done in texture format. This would actually help performance (since
1422        * only need to do it once and could save the per-sample conversion of texels
1423        * to floats instead), but it would need more messy code (would need to push
1424        * at least some bits down to actual fetch so conversion could be skipped,
1425        * and would have ugly interaction with border color, would need to convert
1426        * border color to that format too or do some other tricks to make it work).
1427        */
1428       const struct util_format_description *format_desc = bld->format_desc;
1429       unsigned chan_type;
1430       /* not entirely sure we couldn't end up with non-valid swizzle here */
1431       chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1432                      format_desc->channel[format_desc->swizzle[0]].type :
1433                      UTIL_FORMAT_TYPE_FLOAT;
1434       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1435          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1436                                     bld->coord_bld.zero, bld->coord_bld.one);
1437       }
1438    }
1439
1440    /*
1441     * Compute the level of detail (float).
1442     */
1443    if (min_filter != mag_filter ||
1444        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1445       /* Need to compute lod either to choose mipmap levels or to
1446        * distinguish between minification/magnification with one mipmap level.
1447        */
1448       lp_build_lod_selector(bld, texture_index, sampler_index,
1449                             coords[0], coords[1], coords[2], cube_rho,
1450                             derivs, lod_bias, explicit_lod,
1451                             mip_filter,
1452                             &lod_ipart, lod_fpart, lod_pos_or_zero);
1453    } else {
1454       lod_ipart = bld->lodi_bld.zero;
1455       *lod_pos_or_zero = bld->lodi_bld.zero;
1456    }
1457
1458    if (bld->num_lods != bld->num_mips) {
1459       /* only makes sense if there's just a single mip level */
1460       assert(bld->num_mips == 1);
1461       lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
1462    }
1463
1464    /*
1465     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1466     */
1467    switch (mip_filter) {
1468    default:
1469       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1470       /* fall-through */
1471    case PIPE_TEX_MIPFILTER_NONE:
1472       /* always use mip level 0 */
1473       if (HAVE_LLVM == 0x0207 && target == PIPE_TEXTURE_CUBE) {
1474          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1475           * We should be able to set ilevel0 = const(0) but that causes
1476           * bad x86 code to be emitted.
1477           */
1478          assert(lod_ipart);
1479          lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1480       }
1481       else {
1482          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1483                                                        bld->gallivm, texture_index);
1484          first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1485          *ilevel0 = first_level;
1486       }
1487       break;
1488    case PIPE_TEX_MIPFILTER_NEAREST:
1489       assert(lod_ipart);
1490       lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1491       break;
1492    case PIPE_TEX_MIPFILTER_LINEAR:
1493       assert(lod_ipart);
1494       assert(*lod_fpart);
1495       lp_build_linear_mip_levels(bld, texture_index,
1496                                  lod_ipart, lod_fpart,
1497                                  ilevel0, ilevel1);
1498       break;
1499    }
1500 }
1501
1502 static void
1503 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1504                             unsigned sampler_unit)
1505 {
1506    struct gallivm_state *gallivm = bld->gallivm;
1507    LLVMBuilderRef builder = gallivm->builder;
1508    LLVMValueRef border_color_ptr =
1509       bld->dynamic_state->border_color(bld->dynamic_state,
1510                                         gallivm, sampler_unit);
1511    LLVMValueRef border_color;
1512    const struct util_format_description *format_desc = bld->format_desc;
1513    struct lp_type vec4_type = bld->texel_type;
1514    struct lp_build_context vec4_bld;
1515    LLVMValueRef min_clamp = NULL;
1516    LLVMValueRef max_clamp = NULL;
1517
1518    /*
1519     * For normalized format need to clamp border color (technically
1520     * probably should also quantize the data). Really sucks doing this
1521     * here but can't avoid at least for now since this is part of
1522     * sampler state and texture format is part of sampler_view state.
1523     * GL expects also expects clamping for uint/sint formats too so
1524     * do that as well (d3d10 can't end up here with uint/sint since it
1525     * only supports them with ld).
1526     */
1527    vec4_type.length = 4;
1528    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1529
1530    /*
1531     * Vectorized clamping of border color. Loading is a bit of a hack since
1532     * we just cast the pointer to float array to pointer to vec4
1533     * (int or float).
1534     */
1535    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1536                                              lp_build_const_int32(gallivm, 0));
1537    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1538                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
1539    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1540    /* we don't have aligned type in the dynamic state unfortunately */
1541    lp_set_load_alignment(border_color, 4);
1542
1543    /*
1544     * Instead of having some incredibly complex logic which will try to figure out
1545     * clamping necessary for each channel, simply use the first channel, and treat
1546     * mixed signed/unsigned normalized formats specially.
1547     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1548     * good reason.)
1549     */
1550    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1551       int chan;
1552       /* d/s needs special handling because both present means just sampling depth */
1553       if (util_format_is_depth_and_stencil(format_desc->format)) {
1554          chan = format_desc->swizzle[0];
1555       }
1556       else {
1557          chan = util_format_get_first_non_void_channel(format_desc->format);
1558       }
1559       if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1560          unsigned chan_type = format_desc->channel[chan].type;
1561          unsigned chan_norm = format_desc->channel[chan].normalized;
1562          unsigned chan_pure = format_desc->channel[chan].pure_integer;
1563          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1564             if (chan_norm) {
1565                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1566                max_clamp = vec4_bld.one;
1567             }
1568             else if (chan_pure) {
1569                /*
1570                 * Border color was stored as int, hence need min/max clamp
1571                 * only if chan has less than 32 bits..
1572                 */
1573                unsigned chan_size = format_desc->channel[chan].size;
1574                if (chan_size < 32) {
1575                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1576                                                      0 - (1 << (chan_size - 1)));
1577                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1578                                                      (1 << (chan_size - 1)) - 1);
1579                }
1580             }
1581             /* TODO: no idea about non-pure, non-normalized! */
1582          }
1583          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1584             if (chan_norm) {
1585                min_clamp = vec4_bld.zero;
1586                max_clamp = vec4_bld.one;
1587             }
1588             /*
1589              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1590              * we use Z32_FLOAT_S8X24 to imply sampling depth component
1591              * and ignoring stencil, which will blow up here if we try to
1592              * do a uint clamp in a float texel build...
1593              * And even if we had that format, mesa st also thinks using z24s8
1594              * means depth sampling ignoring stencil.
1595              */
1596             else if (chan_pure) {
1597                /*
1598                 * Border color was stored as uint, hence never need min
1599                 * clamp, and only need max clamp if chan has less than 32 bits.
1600                 */
1601                unsigned chan_size = format_desc->channel[chan].size;
1602                if (chan_size < 32) {
1603                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1604                                                      (1 << chan_size) - 1);
1605                }
1606                /* TODO: no idea about non-pure, non-normalized! */
1607             }
1608          }
1609          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
1610             /* TODO: I have no idea what clamp this would need if any! */
1611          }
1612       }
1613       /* mixed plain formats (or different pure size) */
1614       switch (format_desc->format) {
1615       case PIPE_FORMAT_B10G10R10A2_UINT:
1616       case PIPE_FORMAT_R10G10B10A2_UINT:
1617       {
1618          unsigned max10 = (1 << 10) - 1;
1619          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
1620                                         max10, (1 << 2) - 1, NULL);
1621       }
1622          break;
1623       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1624          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1625                                         -1.0F, 0.0F, NULL);
1626          max_clamp = vec4_bld.one;
1627          break;
1628       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
1629       case PIPE_FORMAT_R5SG5SB6U_NORM:
1630          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1631                                         0.0F, 0.0F, NULL);
1632          max_clamp = vec4_bld.one;
1633          break;
1634       default:
1635          break;
1636       }
1637    }
1638    else {
1639       /* cannot figure this out from format description */
1640       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1641          /* s3tc formats are always unorm */
1642          min_clamp = vec4_bld.zero;
1643          max_clamp = vec4_bld.one;
1644       }
1645       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
1646                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
1647          switch (format_desc->format) {
1648          case PIPE_FORMAT_RGTC1_UNORM:
1649          case PIPE_FORMAT_RGTC2_UNORM:
1650          case PIPE_FORMAT_LATC1_UNORM:
1651          case PIPE_FORMAT_LATC2_UNORM:
1652          case PIPE_FORMAT_ETC1_RGB8:
1653             min_clamp = vec4_bld.zero;
1654             max_clamp = vec4_bld.one;
1655             break;
1656          case PIPE_FORMAT_RGTC1_SNORM:
1657          case PIPE_FORMAT_RGTC2_SNORM:
1658          case PIPE_FORMAT_LATC1_SNORM:
1659          case PIPE_FORMAT_LATC2_SNORM:
1660             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1661             max_clamp = vec4_bld.one;
1662             break;
1663          default:
1664             assert(0);
1665             break;
1666          }
1667       }
1668       /*
1669        * all others from subsampled/other group, though we don't care
1670        * about yuv (and should not have any from zs here)
1671        */
1672       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
1673          switch (format_desc->format) {
1674          case PIPE_FORMAT_R8G8_B8G8_UNORM:
1675          case PIPE_FORMAT_G8R8_G8B8_UNORM:
1676          case PIPE_FORMAT_G8R8_B8R8_UNORM:
1677          case PIPE_FORMAT_R8G8_R8B8_UNORM:
1678          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
1679             min_clamp = vec4_bld.zero;
1680             max_clamp = vec4_bld.one;
1681             break;
1682          case PIPE_FORMAT_R8G8Bx_SNORM:
1683             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1684             max_clamp = vec4_bld.one;
1685             break;
1686             /*
1687              * Note smallfloat formats usually don't need clamping
1688              * (they still have infinite range) however this is not
1689              * true for r11g11b10 and r9g9b9e5, which can't represent
1690              * negative numbers (and additionally r9g9b9e5 can't represent
1691              * very large numbers). d3d10 seems happy without clamping in
1692              * this case, but gl spec is pretty clear: "for floating
1693              * point and integer formats, border values are clamped to
1694              * the representable range of the format" so do that here.
1695              */
1696          case PIPE_FORMAT_R11G11B10_FLOAT:
1697             min_clamp = vec4_bld.zero;
1698             break;
1699          case PIPE_FORMAT_R9G9B9E5_FLOAT:
1700             min_clamp = vec4_bld.zero;
1701             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
1702             break;
1703          default:
1704             assert(0);
1705             break;
1706          }
1707       }
1708    }
1709
1710    if (min_clamp) {
1711       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
1712    }
1713    if (max_clamp) {
1714       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
1715    }
1716
1717    bld->border_color_clamped = border_color;
1718 }
1719
1720
1721 /**
1722  * General texture sampling codegen.
1723  * This function handles texture sampling for all texture targets (1D,
1724  * 2D, 3D, cube) and all filtering modes.
1725  */
1726 static void
1727 lp_build_sample_general(struct lp_build_sample_context *bld,
1728                         unsigned sampler_unit,
1729                         LLVMValueRef *coords,
1730                         const LLVMValueRef *offsets,
1731                         LLVMValueRef lod_positive,
1732                         LLVMValueRef lod_fpart,
1733                         LLVMValueRef ilevel0,
1734                         LLVMValueRef ilevel1,
1735                         LLVMValueRef *colors_out)
1736 {
1737    LLVMBuilderRef builder = bld->gallivm->builder;
1738    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
1739    const unsigned mip_filter = sampler_state->min_mip_filter;
1740    const unsigned min_filter = sampler_state->min_img_filter;
1741    const unsigned mag_filter = sampler_state->mag_img_filter;
1742    LLVMValueRef texels[4];
1743    unsigned chan;
1744
1745    /* if we need border color, (potentially) clamp it now */
1746    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
1747                                               min_filter,
1748                                               mag_filter) ||
1749        (bld->dims > 1 &&
1750            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
1751                                                   min_filter,
1752                                                   mag_filter)) ||
1753        (bld->dims > 2 &&
1754            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
1755                                                   min_filter,
1756                                                   mag_filter))) {
1757       lp_build_clamp_border_color(bld, sampler_unit);
1758    }
1759
1760
1761    /*
1762     * Get/interpolate texture colors.
1763     */
1764
1765    for (chan = 0; chan < 4; ++chan) {
1766      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
1767      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
1768    }
1769
1770    if (min_filter == mag_filter) {
1771       /* no need to distinguish between minification and magnification */
1772       lp_build_sample_mipmap(bld, min_filter, mip_filter,
1773                              coords, offsets,
1774                              ilevel0, ilevel1, lod_fpart,
1775                              texels);
1776    }
1777    else {
1778       /*
1779        * Could also get rid of the if-logic and always use mipmap_both, both
1780        * for the single lod and multi-lod case if nothing really uses this.
1781        */
1782       if (bld->num_lods == 1) {
1783          /* Emit conditional to choose min image filter or mag image filter
1784           * depending on the lod being > 0 or <= 0, respectively.
1785           */
1786          struct lp_build_if_state if_ctx;
1787
1788          lod_positive = LLVMBuildTrunc(builder, lod_positive,
1789                                        LLVMInt1TypeInContext(bld->gallivm->context), "");
1790
1791          lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1792          {
1793             /* Use the minification filter */
1794             lp_build_sample_mipmap(bld, min_filter, mip_filter,
1795                                    coords, offsets,
1796                                    ilevel0, ilevel1, lod_fpart,
1797                                    texels);
1798          }
1799          lp_build_else(&if_ctx);
1800          {
1801             /* Use the magnification filter */
1802             lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
1803                                    coords, offsets,
1804                                    ilevel0, NULL, NULL,
1805                                    texels);
1806          }
1807          lp_build_endif(&if_ctx);
1808       }
1809       else {
1810          LLVMValueRef need_linear, linear_mask;
1811          unsigned mip_filter_for_nearest;
1812          struct lp_build_if_state if_ctx;
1813
1814          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
1815             linear_mask = lod_positive;
1816             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
1817          }
1818          else {
1819             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
1820             mip_filter_for_nearest = mip_filter;
1821          }
1822          need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
1823                                                linear_mask);
1824
1825          if (bld->num_lods != bld->coord_type.length) {
1826             linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1827                                                                 bld->lodi_type,
1828                                                                 bld->int_coord_type,
1829                                                                 linear_mask);
1830          }
1831
1832          lp_build_if(&if_ctx, bld->gallivm, need_linear);
1833          {
1834             /*
1835              * Do sampling with both filters simultaneously. This means using
1836              * a linear filter and doing some tricks (with weights) for the pixels
1837              * which need nearest filter.
1838              * Note that it's probably rare some pixels need nearest and some
1839              * linear filter but the fixups required for the nearest pixels
1840              * aren't all that complicated so just always run a combined path
1841              * if at least some pixels require linear.
1842              */
1843             lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
1844                                         coords, offsets,
1845                                         ilevel0, ilevel1,
1846                                         lod_fpart, lod_positive,
1847                                         texels);
1848          }
1849          lp_build_else(&if_ctx);
1850          {
1851             /*
1852              * All pixels require just nearest filtering, which is way
1853              * cheaper than linear, hence do a separate path for that.
1854              */
1855             lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
1856                                    mip_filter_for_nearest,
1857                                    coords, offsets,
1858                                    ilevel0, ilevel1, lod_fpart,
1859                                    texels);
1860          }
1861          lp_build_endif(&if_ctx);
1862       }
1863    }
1864
1865    for (chan = 0; chan < 4; ++chan) {
1866      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
1867      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
1868    }
1869 }
1870
1871
1872 /**
1873  * Texel fetch function.
1874  * In contrast to general sampling there is no filtering, no coord minification,
1875  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
1876  * directly to be applied to the selected mip level (after adding texel offsets).
1877  * This function handles texel fetch for all targets where texel fetch is supported
1878  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
1879  */
1880 static void
1881 lp_build_fetch_texel(struct lp_build_sample_context *bld,
1882                      unsigned texture_unit,
1883                      const LLVMValueRef *coords,
1884                      LLVMValueRef explicit_lod,
1885                      const LLVMValueRef *offsets,
1886                      LLVMValueRef *colors_out)
1887 {
1888    struct lp_build_context *perquadi_bld = &bld->lodi_bld;
1889    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1890    unsigned dims = bld->dims, chan;
1891    unsigned target = bld->static_texture_state->target;
1892    boolean out_of_bound_ret_zero = TRUE;
1893    LLVMValueRef size, ilevel;
1894    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
1895    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
1896    LLVMValueRef width, height, depth, i, j;
1897    LLVMValueRef offset, out_of_bounds, out1;
1898
1899    out_of_bounds = int_coord_bld->zero;
1900
1901    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
1902       if (bld->num_mips != int_coord_bld->type.length) {
1903          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
1904                                             perquadi_bld->type, explicit_lod, 0);
1905       }
1906       else {
1907          ilevel = explicit_lod;
1908       }
1909       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
1910                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
1911    }
1912    else {
1913       assert(bld->num_mips == 1);
1914       if (bld->static_texture_state->target != PIPE_BUFFER) {
1915          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
1916                                                   bld->gallivm, texture_unit);
1917       }
1918       else {
1919          ilevel = lp_build_const_int32(bld->gallivm, 0);
1920       }
1921    }
1922    lp_build_mipmap_level_sizes(bld, ilevel,
1923                                &size,
1924                                &row_stride_vec, &img_stride_vec);
1925    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
1926                                 size, &width, &height, &depth);
1927
1928    if (target == PIPE_TEXTURE_1D_ARRAY ||
1929        target == PIPE_TEXTURE_2D_ARRAY) {
1930       if (out_of_bound_ret_zero) {
1931          z = lp_build_layer_coord(bld, texture_unit, z, &out1);
1932          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1933       }
1934       else {
1935          z = lp_build_layer_coord(bld, texture_unit, z, NULL);
1936       }
1937    }
1938
1939    /* This is a lot like border sampling */
1940    if (offsets[0]) {
1941       /*
1942        * coords are really unsigned, offsets are signed, but I don't think
1943        * exceeding 31 bits is possible
1944        */
1945       x = lp_build_add(int_coord_bld, x, offsets[0]);
1946    }
1947    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
1948    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1949    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
1950    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1951
1952    if (dims >= 2) {
1953       if (offsets[1]) {
1954          y = lp_build_add(int_coord_bld, y, offsets[1]);
1955       }
1956       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
1957       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1958       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
1959       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1960
1961       if (dims >= 3) {
1962          if (offsets[2]) {
1963             z = lp_build_add(int_coord_bld, z, offsets[2]);
1964          }
1965          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
1966          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1967          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
1968          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1969       }
1970    }
1971
1972    lp_build_sample_offset(int_coord_bld,
1973                           bld->format_desc,
1974                           x, y, z, row_stride_vec, img_stride_vec,
1975                           &offset, &i, &j);
1976
1977    if (bld->static_texture_state->target != PIPE_BUFFER) {
1978       offset = lp_build_add(int_coord_bld, offset,
1979                             lp_build_get_mip_offsets(bld, ilevel));
1980    }
1981
1982    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
1983
1984    lp_build_fetch_rgba_soa(bld->gallivm,
1985                            bld->format_desc,
1986                            bld->texel_type,
1987                            bld->base_ptr, offset,
1988                            i, j,
1989                            colors_out);
1990
1991    if (out_of_bound_ret_zero) {
1992       /*
1993        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
1994        * Could use min/max above instead of out-of-bounds comparisons
1995        * if we don't care about the result returned for out-of-bounds.
1996        */
1997       for (chan = 0; chan < 4; chan++) {
1998          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
1999                                             bld->texel_bld.zero, colors_out[chan]);
2000       }
2001    }
2002 }
2003
2004
2005 /**
2006  * Just set texels to white instead of actually sampling the texture.
2007  * For debugging.
2008  */
2009 void
2010 lp_build_sample_nop(struct gallivm_state *gallivm,
2011                     struct lp_type type,
2012                     const LLVMValueRef *coords,
2013                     LLVMValueRef texel_out[4])
2014 {
2015    LLVMValueRef one = lp_build_one(gallivm, type);
2016    unsigned chan;
2017
2018    for (chan = 0; chan < 4; chan++) {
2019       texel_out[chan] = one;
2020    }
2021 }
2022
2023
2024 /**
2025  * Build texture sampling code.
2026  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2027  * R, G, B, A.
2028  * \param type  vector float type to use for coords, etc.
2029  * \param is_fetch  if this is a texel fetch instruction.
2030  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
2031  */
2032 void
2033 lp_build_sample_soa(struct gallivm_state *gallivm,
2034                     const struct lp_static_texture_state *static_texture_state,
2035                     const struct lp_static_sampler_state *static_sampler_state,
2036                     struct lp_sampler_dynamic_state *dynamic_state,
2037                     struct lp_type type,
2038                     boolean is_fetch,
2039                     unsigned texture_index,
2040                     unsigned sampler_index,
2041                     const LLVMValueRef *coords,
2042                     const LLVMValueRef *offsets,
2043                     const struct lp_derivatives *derivs, /* optional */
2044                     LLVMValueRef lod_bias, /* optional */
2045                     LLVMValueRef explicit_lod, /* optional */
2046                     enum lp_sampler_lod_property lod_property,
2047                     LLVMValueRef texel_out[4])
2048 {
2049    unsigned target = static_texture_state->target;
2050    unsigned dims = texture_dims(target);
2051    unsigned num_quads = type.length / 4;
2052    unsigned mip_filter, min_img_filter, mag_img_filter, i;
2053    struct lp_build_sample_context bld;
2054    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2055    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2056    LLVMBuilderRef builder = gallivm->builder;
2057    LLVMValueRef tex_width, newcoords[5];
2058
2059    if (0) {
2060       enum pipe_format fmt = static_texture_state->format;
2061       debug_printf("Sample from %s\n", util_format_name(fmt));
2062    }
2063
2064    if (static_texture_state->format == PIPE_FORMAT_NONE) {
2065       /*
2066        * If there's nothing bound, format is NONE, and we must return
2067        * all zero as mandated by d3d10 in this case.
2068        */
2069       unsigned chan;
2070       LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
2071       for (chan = 0; chan < 4; chan++) {
2072          texel_out[chan] = zero;
2073       }
2074       return;
2075    }
2076
2077    assert(type.floating);
2078
2079    /* Setup our build context */
2080    memset(&bld, 0, sizeof bld);
2081    bld.gallivm = gallivm;
2082    bld.static_sampler_state = &derived_sampler_state;
2083    bld.static_texture_state = static_texture_state;
2084    bld.dynamic_state = dynamic_state;
2085    bld.format_desc = util_format_description(static_texture_state->format);
2086    bld.dims = dims;
2087
2088    bld.vector_width = lp_type_width(type);
2089
2090    bld.float_type = lp_type_float(32);
2091    bld.int_type = lp_type_int(32);
2092    bld.coord_type = type;
2093    bld.int_coord_type = lp_int_type(type);
2094    bld.float_size_in_type = lp_type_float(32);
2095    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2096    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2097    bld.texel_type = type;
2098
2099    /* always using the first channel hopefully should be safe,
2100     * if not things WILL break in other places anyway.
2101     */
2102    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2103        bld.format_desc->channel[0].pure_integer) {
2104       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2105          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2106       }
2107       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2108          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2109       }
2110    }
2111    else if (util_format_has_stencil(bld.format_desc) &&
2112        !util_format_has_depth(bld.format_desc)) {
2113       /* for stencil only formats, sample stencil (uint) */
2114       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2115    }
2116
2117    if (!static_texture_state->level_zero_only) {
2118       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2119    } else {
2120       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2121    }
2122    mip_filter = derived_sampler_state.min_mip_filter;
2123
2124    if (0) {
2125       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2126    }
2127
2128    if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2129        static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2130    {
2131       /*
2132        * Seamless filtering ignores wrap modes.
2133        * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2134        * bilinear it's not correct but way better than using for instance repeat.
2135        * Note we even set this for non-seamless. Technically GL allows any wrap
2136        * mode, which made sense when supporting true borders (can get seamless
2137        * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2138        * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2139        * up the sampler state (as it makes it texture dependent).
2140        */
2141       derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2142       derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2143    }
2144
2145    min_img_filter = derived_sampler_state.min_img_filter;
2146    mag_img_filter = derived_sampler_state.mag_img_filter;
2147
2148
2149    /*
2150     * This is all a bit complicated different paths are chosen for performance
2151     * reasons.
2152     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2153     * everything (the last two options are equivalent for 4-wide case).
2154     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2155     * lod is calculated then the lod value extracted afterwards so making this
2156     * case basically the same as far as lod handling is concerned for the
2157     * further sample/filter code as the 1 lod for everything case.
2158     * Different lod handling mostly shows up when building mipmap sizes
2159     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2160     * (getting the fractional part of the lod to the right texels).
2161     */
2162
2163    /*
2164     * There are other situations where at least the multiple int lods could be
2165     * avoided like min and max lod being equal.
2166     */
2167    bld.num_mips = bld.num_lods = 1;
2168
2169    if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2170        (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2171        (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
2172        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2173       /*
2174        * special case for using per-pixel lod even for implicit lod,
2175        * which is generally never required (ok by APIs) except to please
2176        * some (somewhat broken imho) tests (because per-pixel face selection
2177        * can cause derivatives to be different for pixels outside the primitive
2178        * due to the major axis division even if pre-project derivatives are
2179        * looking normal).
2180        */
2181       bld.num_mips = type.length;
2182       bld.num_lods = type.length;
2183    }
2184    else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2185        (explicit_lod || lod_bias || derivs)) {
2186       if ((is_fetch && target != PIPE_BUFFER) ||
2187           (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2188          bld.num_mips = type.length;
2189          bld.num_lods = type.length;
2190       }
2191       else if (!is_fetch && min_img_filter != mag_img_filter) {
2192          bld.num_mips = 1;
2193          bld.num_lods = type.length;
2194       }
2195    }
2196    /* TODO: for true scalar_lod should only use 1 lod value */
2197    else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
2198             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2199       bld.num_mips = num_quads;
2200       bld.num_lods = num_quads;
2201    }
2202    else if (!is_fetch && min_img_filter != mag_img_filter) {
2203       bld.num_mips = 1;
2204       bld.num_lods = num_quads;
2205    }
2206
2207
2208    bld.lodf_type = type;
2209    /* we want native vector size to be able to use our intrinsics */
2210    if (bld.num_lods != type.length) {
2211       /* TODO: this currently always has to be per-quad or per-element */
2212       bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2213    }
2214    bld.lodi_type = lp_int_type(bld.lodf_type);
2215    bld.levelf_type = bld.lodf_type;
2216    if (bld.num_mips == 1) {
2217       bld.levelf_type.length = 1;
2218    }
2219    bld.leveli_type = lp_int_type(bld.levelf_type);
2220    bld.float_size_type = bld.float_size_in_type;
2221    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2222     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2223    if (bld.num_mips > 1) {
2224       bld.float_size_type.length = bld.num_mips == type.length ?
2225                                       bld.num_mips * bld.float_size_in_type.length :
2226                                       type.length;
2227    }
2228    bld.int_size_type = lp_int_type(bld.float_size_type);
2229
2230    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2231    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2232    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2233    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2234    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2235    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2236    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2237    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2238    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2239    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2240    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2241    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2242    lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2243    lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2244
2245    /* Get the dynamic state */
2246    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
2247    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
2248    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
2249    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
2250    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
2251    /* Note that mip_offsets is an array[level] of offsets to texture images */
2252
2253    /* width, height, depth as single int vector */
2254    if (dims <= 1) {
2255       bld.int_size = tex_width;
2256    }
2257    else {
2258       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2259                                             tex_width, LLVMConstInt(i32t, 0, 0), "");
2260       if (dims >= 2) {
2261          LLVMValueRef tex_height =
2262             dynamic_state->height(dynamic_state, gallivm, texture_index);
2263          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2264                                                tex_height, LLVMConstInt(i32t, 1, 0), "");
2265          if (dims >= 3) {
2266             LLVMValueRef tex_depth =
2267                dynamic_state->depth(dynamic_state, gallivm, texture_index);
2268             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2269                                                   tex_depth, LLVMConstInt(i32t, 2, 0), "");
2270          }
2271       }
2272    }
2273
2274    for (i = 0; i < 5; i++) {
2275       newcoords[i] = coords[i];
2276    }
2277
2278    if (0) {
2279       /* For debug: no-op texture sampling */
2280       lp_build_sample_nop(gallivm,
2281                           bld.texel_type,
2282                           newcoords,
2283                           texel_out);
2284    }
2285
2286    else if (is_fetch) {
2287       lp_build_fetch_texel(&bld, texture_index, newcoords,
2288                            explicit_lod, offsets,
2289                            texel_out);
2290    }
2291
2292    else {
2293       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
2294       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2295       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2296                         /* not sure this is strictly needed or simply impossible */
2297                         derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
2298                         lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
2299
2300       use_aos &= bld.num_lods <= num_quads ||
2301                  derived_sampler_state.min_img_filter ==
2302                     derived_sampler_state.mag_img_filter;
2303       if (dims > 1) {
2304          use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
2305          if (dims > 2) {
2306             use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
2307          }
2308       }
2309
2310       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2311           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2312          debug_printf("%s: using floating point linear filtering for %s\n",
2313                       __FUNCTION__, bld.format_desc->short_name);
2314          debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d  wrapr %d\n",
2315                       derived_sampler_state.min_img_filter,
2316                       derived_sampler_state.mag_img_filter,
2317                       derived_sampler_state.min_mip_filter,
2318                       derived_sampler_state.wrap_s,
2319                       derived_sampler_state.wrap_t,
2320                       derived_sampler_state.wrap_r);
2321       }
2322
2323       lp_build_sample_common(&bld, texture_index, sampler_index,
2324                              newcoords,
2325                              derivs, lod_bias, explicit_lod,
2326                              &lod_positive, &lod_fpart,
2327                              &ilevel0, &ilevel1);
2328
2329       /*
2330        * we only try 8-wide sampling with soa as it appears to
2331        * be a loss with aos with AVX (but it should work, except
2332        * for conformance if min_filter != mag_filter if num_lods > 1).
2333        * (It should be faster if we'd support avx2)
2334        */
2335       if (num_quads == 1 || !use_aos) {
2336          if (use_aos) {
2337             /* do sampling/filtering with fixed pt arithmetic */
2338             lp_build_sample_aos(&bld, sampler_index,
2339                                 newcoords[0], newcoords[1],
2340                                 newcoords[2],
2341                                 offsets, lod_positive, lod_fpart,
2342                                 ilevel0, ilevel1,
2343                                 texel_out);
2344          }
2345
2346          else {
2347             lp_build_sample_general(&bld, sampler_index,
2348                                     newcoords, offsets,
2349                                     lod_positive, lod_fpart,
2350                                     ilevel0, ilevel1,
2351                                     texel_out);
2352          }
2353       }
2354       else {
2355          unsigned j;
2356          struct lp_build_sample_context bld4;
2357          struct lp_type type4 = type;
2358          unsigned i;
2359          LLVMValueRef texelout4[4];
2360          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2361
2362          type4.length = 4;
2363
2364          /* Setup our build context */
2365          memset(&bld4, 0, sizeof bld4);
2366          bld4.gallivm = bld.gallivm;
2367          bld4.static_texture_state = bld.static_texture_state;
2368          bld4.static_sampler_state = bld.static_sampler_state;
2369          bld4.dynamic_state = bld.dynamic_state;
2370          bld4.format_desc = bld.format_desc;
2371          bld4.dims = bld.dims;
2372          bld4.row_stride_array = bld.row_stride_array;
2373          bld4.img_stride_array = bld.img_stride_array;
2374          bld4.base_ptr = bld.base_ptr;
2375          bld4.mip_offsets = bld.mip_offsets;
2376          bld4.int_size = bld.int_size;
2377
2378          bld4.vector_width = lp_type_width(type4);
2379
2380          bld4.float_type = lp_type_float(32);
2381          bld4.int_type = lp_type_int(32);
2382          bld4.coord_type = type4;
2383          bld4.int_coord_type = lp_int_type(type4);
2384          bld4.float_size_in_type = lp_type_float(32);
2385          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2386          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2387          bld4.texel_type = bld.texel_type;
2388          bld4.texel_type.length = 4;
2389
2390          bld4.num_mips = bld4.num_lods = 1;
2391          if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2392              (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2393              (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
2394              (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2395             bld4.num_mips = type4.length;
2396             bld4.num_lods = type4.length;
2397          }
2398          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2399              (explicit_lod || lod_bias || derivs)) {
2400             if ((is_fetch && target != PIPE_BUFFER) ||
2401                 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2402                bld4.num_mips = type4.length;
2403                bld4.num_lods = type4.length;
2404             }
2405             else if (!is_fetch && min_img_filter != mag_img_filter) {
2406                bld4.num_mips = 1;
2407                bld4.num_lods = type4.length;
2408             }
2409          }
2410
2411          /* we want native vector size to be able to use our intrinsics */
2412          bld4.lodf_type = type4;
2413          if (bld4.num_lods != type4.length) {
2414             bld4.lodf_type.length = 1;
2415          }
2416          bld4.lodi_type = lp_int_type(bld4.lodf_type);
2417          bld4.levelf_type = type4;
2418          if (bld4.num_mips != type4.length) {
2419             bld4.levelf_type.length = 1;
2420          }
2421          bld4.leveli_type = lp_int_type(bld4.levelf_type);
2422          bld4.float_size_type = bld4.float_size_in_type;
2423          if (bld4.num_mips > 1) {
2424             bld4.float_size_type.length = bld4.num_mips == type4.length ?
2425                                             bld4.num_mips * bld4.float_size_in_type.length :
2426                                             type4.length;
2427          }
2428          bld4.int_size_type = lp_int_type(bld4.float_size_type);
2429
2430          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2431          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2432          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2433          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2434          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2435          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2436          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2437          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2438          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2439          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2440          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2441          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2442          lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
2443          lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
2444
2445          for (i = 0; i < num_quads; i++) {
2446             LLVMValueRef s4, t4, r4;
2447             LLVMValueRef lod_positive4, lod_fpart4 = NULL;
2448             LLVMValueRef ilevel04, ilevel14 = NULL;
2449             LLVMValueRef offsets4[4] = { NULL };
2450             unsigned num_lods = bld4.num_lods;
2451
2452             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2453             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2454             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2455
2456             if (offsets[0]) {
2457                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2458                if (dims > 1) {
2459                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2460                   if (dims > 2) {
2461                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2462                   }
2463                }
2464             }
2465             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
2466             ilevel04 = bld.num_mips == 1 ? ilevel0 :
2467                           lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2468             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2469                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2470                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2471             }
2472
2473             if (use_aos) {
2474                /* do sampling/filtering with fixed pt arithmetic */
2475                lp_build_sample_aos(&bld4, sampler_index,
2476                                    s4, t4, r4, offsets4,
2477                                    lod_positive4, lod_fpart4,
2478                                    ilevel04, ilevel14,
2479                                    texelout4);
2480             }
2481
2482             else {
2483                /* this path is currently unreachable and hence might break easily... */
2484                LLVMValueRef newcoords4[5];
2485                newcoords4[0] = s4;
2486                newcoords4[1] = t4;
2487                newcoords4[2] = r4;
2488                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2489                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2490
2491                lp_build_sample_general(&bld4, sampler_index,
2492                                        newcoords4, offsets4,
2493                                        lod_positive4, lod_fpart4,
2494                                        ilevel04, ilevel14,
2495                                        texelout4);
2496             }
2497             for (j = 0; j < 4; j++) {
2498                texelouttmp[j][i] = texelout4[j];
2499             }
2500          }
2501
2502          for (j = 0; j < 4; j++) {
2503             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2504          }
2505       }
2506    }
2507
2508    if (target != PIPE_BUFFER) {
2509       apply_sampler_swizzle(&bld, texel_out);
2510    }
2511
2512    /*
2513     * texel type can be a (32bit) int/uint (for pure int formats only),
2514     * however we are expected to always return floats (storage is untyped).
2515     */
2516    if (!bld.texel_type.floating) {
2517       unsigned chan;
2518       for (chan = 0; chan < 4; chan++) {
2519          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2520                                             lp_build_vec_type(gallivm, type), "");
2521       }
2522    }
2523 }
2524
2525 void
2526 lp_build_size_query_soa(struct gallivm_state *gallivm,
2527                         const struct lp_static_texture_state *static_state,
2528                         struct lp_sampler_dynamic_state *dynamic_state,
2529                         struct lp_type int_type,
2530                         unsigned texture_unit,
2531                         unsigned target,
2532                         boolean is_sviewinfo,
2533                         enum lp_sampler_lod_property lod_property,
2534                         LLVMValueRef explicit_lod,
2535                         LLVMValueRef *sizes_out)
2536 {
2537    LLVMValueRef lod, level, size;
2538    LLVMValueRef first_level = NULL;
2539    int dims, i;
2540    boolean has_array;
2541    unsigned num_lods = 1;
2542    struct lp_build_context bld_int_vec4;
2543
2544    if (static_state->format == PIPE_FORMAT_NONE) {
2545       /*
2546        * If there's nothing bound, format is NONE, and we must return
2547        * all zero as mandated by d3d10 in this case.
2548        */
2549       unsigned chan;
2550       LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
2551       for (chan = 0; chan < 4; chan++) {
2552          sizes_out[chan] = zero;
2553       }
2554       return;
2555    }
2556
2557    /*
2558     * Do some sanity verification about bound texture and shader dcl target.
2559     * Not entirely sure what's possible but assume array/non-array
2560     * always compatible (probably not ok for OpenGL but d3d10 has no
2561     * distinction of arrays at the resource level).
2562     * Everything else looks bogus (though not entirely sure about rect/2d).
2563     * Currently disabled because it causes assertion failures if there's
2564     * nothing bound (or rather a dummy texture, not that this case would
2565     * return the right values).
2566     */
2567    if (0 && static_state->target != target) {
2568       if (static_state->target == PIPE_TEXTURE_1D)
2569          assert(target == PIPE_TEXTURE_1D_ARRAY);
2570       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2571          assert(target == PIPE_TEXTURE_1D);
2572       else if (static_state->target == PIPE_TEXTURE_2D)
2573          assert(target == PIPE_TEXTURE_2D_ARRAY);
2574       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2575          assert(target == PIPE_TEXTURE_2D);
2576       else if (static_state->target == PIPE_TEXTURE_CUBE)
2577          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2578       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2579          assert(target == PIPE_TEXTURE_CUBE);
2580       else
2581          assert(0);
2582    }
2583
2584    dims = texture_dims(target);
2585
2586    switch (target) {
2587    case PIPE_TEXTURE_1D_ARRAY:
2588    case PIPE_TEXTURE_2D_ARRAY:
2589       has_array = TRUE;
2590       break;
2591    default:
2592       has_array = FALSE;
2593       break;
2594    }
2595
2596    assert(!int_type.floating);
2597
2598    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2599
2600    if (explicit_lod) {
2601       /* FIXME: this needs to honor per-element lod */
2602       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2603       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2604       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2605       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2606    } else {
2607       lod = bld_int_vec4.zero;
2608    }
2609
2610    size = bld_int_vec4.undef;
2611
2612    size = LLVMBuildInsertElement(gallivm->builder, size,
2613                                  dynamic_state->width(dynamic_state, gallivm, texture_unit),
2614                                  lp_build_const_int32(gallivm, 0), "");
2615
2616    if (dims >= 2) {
2617       size = LLVMBuildInsertElement(gallivm->builder, size,
2618                                     dynamic_state->height(dynamic_state, gallivm, texture_unit),
2619                                     lp_build_const_int32(gallivm, 1), "");
2620    }
2621
2622    if (dims >= 3) {
2623       size = LLVMBuildInsertElement(gallivm->builder, size,
2624                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2625                                     lp_build_const_int32(gallivm, 2), "");
2626    }
2627
2628    size = lp_build_minify(&bld_int_vec4, size, lod);
2629
2630    if (has_array)
2631       size = LLVMBuildInsertElement(gallivm->builder, size,
2632                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2633                                     lp_build_const_int32(gallivm, dims), "");
2634
2635    /*
2636     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2637     * if level is out of bounds (note this can't cover unbound texture
2638     * here, which also requires returning zero).
2639     */
2640    if (explicit_lod && is_sviewinfo) {
2641       LLVMValueRef last_level, out, out1;
2642       struct lp_build_context leveli_bld;
2643
2644       /* everything is scalar for now */
2645       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2646       last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2647
2648       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2649       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2650       out = lp_build_or(&leveli_bld, out, out1);
2651       if (num_lods == 1) {
2652          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2653       }
2654       else {
2655          /* TODO */
2656          assert(0);
2657       }
2658       size = lp_build_andnot(&bld_int_vec4, size, out);
2659    }
2660    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2661       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2662                                                 size,
2663                                                 lp_build_const_int32(gallivm, i));
2664    }
2665    if (is_sviewinfo) {
2666       for (; i < 4; i++) {
2667          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2668       }
2669    }
2670
2671    /*
2672     * if there's no explicit_lod (buffers, rects) queries requiring nr of
2673     * mips would be illegal.
2674     */
2675    if (is_sviewinfo && explicit_lod) {
2676       struct lp_build_context bld_int_scalar;
2677       LLVMValueRef num_levels;
2678       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2679
2680       if (static_state->level_zero_only) {
2681          num_levels = bld_int_scalar.one;
2682       }
2683       else {
2684          LLVMValueRef last_level;
2685
2686          last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2687          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2688          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2689       }
2690       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2691                                         num_levels);
2692    }
2693 }