src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_shader_tokens.h"
  39 #include "util/u_debug.h"
  40 #include "util/u_dump.h"
  41 #include "util/u_memory.h"
  42 #include "util/u_math.h"
  43 #include "util/u_format.h"
  44 #include "util/u_cpu_detect.h"
  45 #include "lp_bld_debug.h"
  46 #include "lp_bld_type.h"
  47 #include "lp_bld_const.h"
  48 #include "lp_bld_conv.h"
  49 #include "lp_bld_arit.h"
  50 #include "lp_bld_bitarit.h"
  51 #include "lp_bld_logic.h"
  52 #include "lp_bld_printf.h"
  53 #include "lp_bld_swizzle.h"
  54 #include "lp_bld_flow.h"
  55 #include "lp_bld_gather.h"
  56 #include "lp_bld_format.h"
  57 #include "lp_bld_sample.h"
  58 #include "lp_bld_sample_aos.h"
  59 #include "lp_bld_struct.h"
  60 #include "lp_bld_quad.h"
  61 #include "lp_bld_pack.h"
  62
  63
  64 /**
  65  * Generate code to fetch a texel from a texture at int coords (x, y, z).
  66  * The computation depends on whether the texture is 1D, 2D or 3D.
  67  * The result, texel, will be float vectors:
  68  *   texel[0] = red values
  69  *   texel[1] = green values
  70  *   texel[2] = blue values
  71  *   texel[3] = alpha values
  72  */
  73 static void
  74 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  75                           unsigned sampler_unit,
  76                           LLVMValueRef width,
  77                           LLVMValueRef height,
  78                           LLVMValueRef depth,
  79                           LLVMValueRef x,
  80                           LLVMValueRef y,
  81                           LLVMValueRef z,
  82                           LLVMValueRef y_stride,
  83                           LLVMValueRef z_stride,
  84                           LLVMValueRef data_ptr,
  85                           LLVMValueRef mipoffsets,
  86                           LLVMValueRef texel_out[4])
  87 {
  88    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
  89    const unsigned dims = bld->dims;
  90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91    LLVMBuilderRef builder = bld->gallivm->builder;
  92    LLVMValueRef offset;
  93    LLVMValueRef i, j;
  94    LLVMValueRef use_border = NULL;
  95
  96    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
  97    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
  98                                               static_state->min_img_filter,
  99                                               static_state->mag_img_filter)) {
 100       LLVMValueRef b1, b2;
 101       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 102       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 103       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 104    }
 105
 106    if (dims >= 2 &&
 107        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
 108                                               static_state->min_img_filter,
 109                                               static_state->mag_img_filter)) {
 110       LLVMValueRef b1, b2;
 111       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 112       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 113       if (use_border) {
 114          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 115          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 116       }
 117       else {
 118          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 119       }
 120    }
 121
 122    if (dims == 3 &&
 123        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
 124                                               static_state->min_img_filter,
 125                                               static_state->mag_img_filter)) {
 126       LLVMValueRef b1, b2;
 127       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 128       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 129       if (use_border) {
 130          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
 131          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
 132       }
 133       else {
 134          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
 135       }
 136    }
 137
 138    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 139    lp_build_sample_offset(&bld->int_coord_bld,
 140                           bld->format_desc,
 141                           x, y, z, y_stride, z_stride,
 142                           &offset, &i, &j);
 143    if (mipoffsets) {
 144       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
 145    }
 146
 147    if (use_border) {
 148       /* If we can sample the border color, it means that texcoords may
 149        * lie outside the bounds of the texture image.  We need to do
 150        * something to prevent reading out of bounds and causing a segfault.
 151        *
 152        * Simply AND the texture coords with !use_border.  This will cause
 153        * coords which are out of bounds to become zero.  Zero's guaranteed
 154        * to be inside the texture image.
 155        */
 156       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
 157    }
 158
 159    lp_build_fetch_rgba_soa(bld->gallivm,
 160                            bld->format_desc,
 161                            bld->texel_type,
 162                            data_ptr, offset,
 163                            i, j,
 164                            texel_out);
 165
 166    /*
 167     * Note: if we find an app which frequently samples the texture border
 168     * we might want to implement a true conditional here to avoid sampling
 169     * the texture whenever possible (since that's quite a bit of code).
 170     * Ex:
 171     *   if (use_border) {
 172     *      texel = border_color;
 173     *   }
 174     *   else {
 175     *      texel = sample_texture(coord);
 176     *   }
 177     * As it is now, we always sample the texture, then selectively replace
 178     * the texel color results with the border color.
 179     */
 180
 181    if (use_border) {
 182       /* select texel color or border color depending on use_border. */
 183      LLVMValueRef border_color_ptr =
 184          bld->dynamic_state->border_color(bld->dynamic_state,
 185                                           bld->gallivm, sampler_unit);
 186       const struct util_format_description *format_desc;
 187       int chan;
 188       format_desc = util_format_description(bld->static_texture_state->format);
 189       /*
 190        * Only replace channels which are actually present. The others should
 191        * get optimized away eventually by sampler_view swizzle anyway but it's
 192        * easier too as we'd need some extra logic for channels where we can't
 193        * determine the format directly otherwise.
 194        */
 195       for (chan = 0; chan < 4; chan++) {
 196          unsigned chan_s;
 197          /* reverse-map channel... */
 198          for (chan_s = 0; chan_s < 4; chan_s++) {
 199             if (chan_s == format_desc->swizzle[chan]) {
 200                break;
 201             }
 202          }
 203          if (chan_s <= 3) {
 204             LLVMValueRef border_chan =
 205                lp_build_array_get(bld->gallivm, border_color_ptr,
 206                                   lp_build_const_int32(bld->gallivm, chan));
 207             LLVMValueRef border_chan_vec =
 208                lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan);
 209
 210             if (!bld->texel_type.floating) {
 211                border_chan_vec = LLVMBuildBitCast(builder, border_chan_vec,
 212                                                   bld->texel_bld.vec_type, "");
 213             }
 214             else {
 215                /*
 216                 * For normalized format need to clamp border color (technically
 217                 * probably should also quantize the data). Really sucks doing this
 218                 * here but can't avoid at least for now since this is part of
 219                 * sampler state and texture format is part of sampler_view state.
 220                 */
 221                unsigned chan_type = format_desc->channel[chan_s].type;
 222                unsigned chan_norm = format_desc->channel[chan_s].normalized;
 223                if (chan_type == UTIL_FORMAT_TYPE_SIGNED && chan_norm) {
 224                   LLVMValueRef clamp_min;
 225                   clamp_min = lp_build_const_vec(bld->gallivm, bld->texel_type, -1.0F);
 226                   border_chan_vec = lp_build_clamp(&bld->texel_bld, border_chan_vec,
 227                                                    clamp_min,
 228                                                    bld->texel_bld.one);
 229                }
 230                else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED && chan_norm) {
 231                   border_chan_vec = lp_build_clamp(&bld->texel_bld, border_chan_vec,
 232                                                    bld->texel_bld.zero,
 233                                                    bld->texel_bld.one);
 234                }
 235                /* not exactly sure about all others but I think should be ok? */
 236             }
 237             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 238                                               border_chan_vec, texel_out[chan]);
 239          }
 240       }
 241    }
 242 }
 243
 244
 245 /**
 246  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 247  */
 248 static LLVMValueRef
 249 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 250                       LLVMValueRef coord)
 251 {
 252    struct lp_build_context *coord_bld = &bld->coord_bld;
 253    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 254    LLVMValueRef fract, flr, isOdd;
 255
 256    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
 257
 258    /* isOdd = flr & 1 */
 259    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
 260
 261    /* make coord positive or negative depending on isOdd */
 262    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 263
 264    /* convert isOdd to float */
 265    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 266
 267    /* add isOdd to coord */
 268    coord = lp_build_add(coord_bld, coord, isOdd);
 269
 270    return coord;
 271 }
 272
 273
 274 /**
 275  * Helper to compute the first coord and the weight for
 276  * linear wrap repeat npot textures
 277  */
 278 void
 279 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
 280                                   LLVMValueRef coord_f,
 281                                   LLVMValueRef length_i,
 282                                   LLVMValueRef length_f,
 283                                   LLVMValueRef *coord0_i,
 284                                   LLVMValueRef *weight_f)
 285 {
 286    struct lp_build_context *coord_bld = &bld->coord_bld;
 287    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 288    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 289    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
 290                                                 int_coord_bld->one);
 291    LLVMValueRef mask;
 292    /* wrap with normalized floats is just fract */
 293    coord_f = lp_build_fract(coord_bld, coord_f);
 294    /* mul by size and subtract 0.5 */
 295    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
 296    coord_f = lp_build_sub(coord_bld, coord_f, half);
 297    /*
 298     * we avoided the 0.5/length division before the repeat wrap,
 299     * now need to fix up edge cases with selects
 300     */
 301    /* convert to int, compute lerp weight */
 302    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
 303    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 304                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
 305    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 306 }
 307
 308
 309 /**
 310  * Build LLVM code for texture wrap mode for linear filtering.
 311  * \param x0_out  returns first integer texcoord
 312  * \param x1_out  returns second integer texcoord
 313  * \param weight_out  returns linear interpolation weight
 314  */
 315 static void
 316 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 317                             LLVMValueRef coord,
 318                             LLVMValueRef length,
 319                             LLVMValueRef length_f,
 320                             LLVMValueRef offset,
 321                             boolean is_pot,
 322                             unsigned wrap_mode,
 323                             LLVMValueRef *x0_out,
 324                             LLVMValueRef *x1_out,
 325                             LLVMValueRef *weight_out)
 326 {
 327    struct lp_build_context *coord_bld = &bld->coord_bld;
 328    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 329    LLVMBuilderRef builder = bld->gallivm->builder;
 330    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
 331    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 332    LLVMValueRef coord0, coord1, weight;
 333
 334    switch(wrap_mode) {
 335    case PIPE_TEX_WRAP_REPEAT:
 336       if (is_pot) {
 337          /* mul by size and subtract 0.5 */
 338          coord = lp_build_mul(coord_bld, coord, length_f);
 339          coord = lp_build_sub(coord_bld, coord, half);
 340          if (offset) {
 341             offset = lp_build_int_to_float(coord_bld, offset);
 342             coord = lp_build_add(coord_bld, coord, offset);
 343          }
 344          /* convert to int, compute lerp weight */
 345          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 346          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 347          /* repeat wrap */
 348          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
 349          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
 350       }
 351       else {
 352          LLVMValueRef mask;
 353          if (offset) {
 354             offset = lp_build_int_to_float(coord_bld, offset);
 355             offset = lp_build_div(coord_bld, offset, length_f);
 356             coord = lp_build_add(coord_bld, coord, offset);
 357          }
 358          lp_build_coord_repeat_npot_linear(bld, coord,
 359                                            length, length_f,
 360                                            &coord0, &weight);
 361          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
 362                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 363          coord1 = LLVMBuildAnd(builder,
 364                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
 365                                mask, "");
 366       }
 367       break;
 368
 369    case PIPE_TEX_WRAP_CLAMP:
 370       if (bld->static_sampler_state->normalized_coords) {
 371          /* scale coord to length */
 372          coord = lp_build_mul(coord_bld, coord, length_f);
 373       }
 374       if (offset) {
 375          offset = lp_build_int_to_float(coord_bld, offset);
 376          coord = lp_build_add(coord_bld, coord, offset);
 377       }
 378
 379       /* clamp to [0, length] */
 380       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 381
 382       coord = lp_build_sub(coord_bld, coord, half);
 383
 384       /* convert to int, compute lerp weight */
 385       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 386       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 387       break;
 388
 389    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 390       {
 391          struct lp_build_context abs_coord_bld = bld->coord_bld;
 392          abs_coord_bld.type.sign = FALSE;
 393
 394          if (bld->static_sampler_state->normalized_coords) {
 395             /* mul by tex size */
 396             coord = lp_build_mul(coord_bld, coord, length_f);
 397          }
 398          if (offset) {
 399             offset = lp_build_int_to_float(coord_bld, offset);
 400             coord = lp_build_add(coord_bld, coord, offset);
 401          }
 402
 403          /* clamp to length max */
 404          coord = lp_build_min(coord_bld, coord, length_f);
 405          /* subtract 0.5 */
 406          coord = lp_build_sub(coord_bld, coord, half);
 407          /* clamp to [0, length - 0.5] */
 408          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 409          /* convert to int, compute lerp weight */
 410          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 411          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 412          /* coord1 = min(coord1, length-1) */
 413          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 414          break;
 415       }
 416
 417    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 418       if (bld->static_sampler_state->normalized_coords) {
 419          /* scale coord to length */
 420          coord = lp_build_mul(coord_bld, coord, length_f);
 421       }
 422       if (offset) {
 423          offset = lp_build_int_to_float(coord_bld, offset);
 424          coord = lp_build_add(coord_bld, coord, offset);
 425       }
 426       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
 427       /* can skip clamp (though might not work for very large coord values */
 428       coord = lp_build_sub(coord_bld, coord, half);
 429       /* convert to int, compute lerp weight */
 430       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 431       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 432       break;
 433
 434    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 435       /* compute mirror function */
 436       coord = lp_build_coord_mirror(bld, coord);
 437
 438       /* scale coord to length */
 439       coord = lp_build_mul(coord_bld, coord, length_f);
 440       coord = lp_build_sub(coord_bld, coord, half);
 441       if (offset) {
 442          offset = lp_build_int_to_float(coord_bld, offset);
 443          coord = lp_build_add(coord_bld, coord, offset);
 444       }
 445
 446       /* convert to int, compute lerp weight */
 447       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 448       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 449
 450       /* coord0 = max(coord0, 0) */
 451       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 452       /* coord1 = min(coord1, length-1) */
 453       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 454       break;
 455
 456    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 457       if (bld->static_sampler_state->normalized_coords) {
 458          /* scale coord to length */
 459          coord = lp_build_mul(coord_bld, coord, length_f);
 460       }
 461       if (offset) {
 462          offset = lp_build_int_to_float(coord_bld, offset);
 463          coord = lp_build_add(coord_bld, coord, offset);
 464       }
 465       coord = lp_build_abs(coord_bld, coord);
 466
 467       /* clamp to [0, length] */
 468       coord = lp_build_min(coord_bld, coord, length_f);
 469
 470       coord = lp_build_sub(coord_bld, coord, half);
 471
 472       /* convert to int, compute lerp weight */
 473       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 474       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 475       break;
 476
 477    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 478       {
 479          struct lp_build_context abs_coord_bld = bld->coord_bld;
 480          abs_coord_bld.type.sign = FALSE;
 481
 482          if (bld->static_sampler_state->normalized_coords) {
 483             /* scale coord to length */
 484             coord = lp_build_mul(coord_bld, coord, length_f);
 485          }
 486          if (offset) {
 487             offset = lp_build_int_to_float(coord_bld, offset);
 488             coord = lp_build_add(coord_bld, coord, offset);
 489          }
 490          coord = lp_build_abs(coord_bld, coord);
 491
 492          /* clamp to length max */
 493          coord = lp_build_min(coord_bld, coord, length_f);
 494          /* subtract 0.5 */
 495          coord = lp_build_sub(coord_bld, coord, half);
 496          /* clamp to [0, length - 0.5] */
 497          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
 498
 499          /* convert to int, compute lerp weight */
 500          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
 501          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 502          /* coord1 = min(coord1, length-1) */
 503          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 504       }
 505       break;
 506
 507    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 508       {
 509          if (bld->static_sampler_state->normalized_coords) {
 510             /* scale coord to length */
 511             coord = lp_build_mul(coord_bld, coord, length_f);
 512          }
 513          if (offset) {
 514             offset = lp_build_int_to_float(coord_bld, offset);
 515             coord = lp_build_add(coord_bld, coord, offset);
 516          }
 517          coord = lp_build_abs(coord_bld, coord);
 518
 519          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
 520          /* skip clamp - always positive, and other side
 521             only potentially matters for very large coords */
 522          coord = lp_build_sub(coord_bld, coord, half);
 523
 524          /* convert to int, compute lerp weight */
 525          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
 526          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 527       }
 528       break;
 529
 530    default:
 531       assert(0);
 532       coord0 = NULL;
 533       coord1 = NULL;
 534       weight = NULL;
 535    }
 536
 537    *x0_out = coord0;
 538    *x1_out = coord1;
 539    *weight_out = weight;
 540 }
 541
 542
 543 /**
 544  * Build LLVM code for texture wrap mode for nearest filtering.
 545  * \param coord  the incoming texcoord (nominally in [0,1])
 546  * \param length  the texture size along one dimension, as int vector
 547  * \param length_f  the texture size along one dimension, as float vector
 548  * \param offset  texel offset along one dimension (as int vector)
 549  * \param is_pot  if TRUE, length is a power of two
 550  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 551  */
 552 static LLVMValueRef
 553 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 554                              LLVMValueRef coord,
 555                              LLVMValueRef length,
 556                              LLVMValueRef length_f,
 557                              LLVMValueRef offset,
 558                              boolean is_pot,
 559                              unsigned wrap_mode)
 560 {
 561    struct lp_build_context *coord_bld = &bld->coord_bld;
 562    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 563    LLVMBuilderRef builder = bld->gallivm->builder;
 564    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 565    LLVMValueRef icoord;
 566
 567    switch(wrap_mode) {
 568    case PIPE_TEX_WRAP_REPEAT:
 569       if (is_pot) {
 570          coord = lp_build_mul(coord_bld, coord, length_f);
 571          icoord = lp_build_ifloor(coord_bld, coord);
 572          if (offset) {
 573             icoord = lp_build_add(int_coord_bld, icoord, offset);
 574          }
 575          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
 576       }
 577       else {
 578           if (offset) {
 579              offset = lp_build_int_to_float(coord_bld, offset);
 580              offset = lp_build_div(coord_bld, offset, length_f);
 581              coord = lp_build_add(coord_bld, coord, offset);
 582           }
 583           /* take fraction, unnormalize */
 584           coord = lp_build_fract_safe(coord_bld, coord);
 585           coord = lp_build_mul(coord_bld, coord, length_f);
 586           icoord = lp_build_itrunc(coord_bld, coord);
 587       }
 588       break;
 589
 590    case PIPE_TEX_WRAP_CLAMP:
 591    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 592       if (bld->static_sampler_state->normalized_coords) {
 593          /* scale coord to length */
 594          coord = lp_build_mul(coord_bld, coord, length_f);
 595       }
 596
 597       /* floor */
 598       /* use itrunc instead since we clamp to 0 anyway */
 599       icoord = lp_build_itrunc(coord_bld, coord);
 600       if (offset) {
 601          icoord = lp_build_add(int_coord_bld, icoord, offset);
 602       }
 603
 604       /* clamp to [0, length - 1]. */
 605       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 606                               length_minus_one);
 607       break;
 608
 609    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 610       if (bld->static_sampler_state->normalized_coords) {
 611          /* scale coord to length */
 612          coord = lp_build_mul(coord_bld, coord, length_f);
 613       }
 614       /* no clamp necessary, border masking will handle this */
 615       icoord = lp_build_ifloor(coord_bld, coord);
 616       if (offset) {
 617          icoord = lp_build_add(int_coord_bld, icoord, offset);
 618       }
 619       break;
 620
 621    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 622       if (offset) {
 623          offset = lp_build_int_to_float(coord_bld, offset);
 624          offset = lp_build_div(coord_bld, offset, length_f);
 625          coord = lp_build_add(coord_bld, coord, offset);
 626       }
 627       /* compute mirror function */
 628       coord = lp_build_coord_mirror(bld, coord);
 629
 630       /* scale coord to length */
 631       assert(bld->static_sampler_state->normalized_coords);
 632       coord = lp_build_mul(coord_bld, coord, length_f);
 633
 634       /* itrunc == ifloor here */
 635       icoord = lp_build_itrunc(coord_bld, coord);
 636
 637       /* clamp to [0, length - 1] */
 638       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 639       break;
 640
 641    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 642    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 643       if (bld->static_sampler_state->normalized_coords) {
 644          /* scale coord to length */
 645          coord = lp_build_mul(coord_bld, coord, length_f);
 646       }
 647       if (offset) {
 648          offset = lp_build_int_to_float(coord_bld, offset);
 649          coord = lp_build_add(coord_bld, coord, offset);
 650       }
 651       coord = lp_build_abs(coord_bld, coord);
 652
 653       /* itrunc == ifloor here */
 654       icoord = lp_build_itrunc(coord_bld, coord);
 655
 656       /* clamp to [0, length - 1] */
 657       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 658       break;
 659
 660    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 661       if (bld->static_sampler_state->normalized_coords) {
 662          /* scale coord to length */
 663          coord = lp_build_mul(coord_bld, coord, length_f);
 664       }
 665       if (offset) {
 666          offset = lp_build_int_to_float(coord_bld, offset);
 667          coord = lp_build_add(coord_bld, coord, offset);
 668       }
 669       coord = lp_build_abs(coord_bld, coord);
 670
 671       /* itrunc == ifloor here */
 672       icoord = lp_build_itrunc(coord_bld, coord);
 673       break;
 674
 675    default:
 676       assert(0);
 677       icoord = NULL;
 678    }
 679
 680    return icoord;
 681 }
 682
 683
 684 /**
 685  * Generate code to sample a mipmap level with nearest filtering.
 686  * If sampling a cube texture, r = cube face in [0,5].
 687  */
 688 static void
 689 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 690                               unsigned sampler_unit,
 691                               LLVMValueRef size,
 692                               LLVMValueRef row_stride_vec,
 693                               LLVMValueRef img_stride_vec,
 694                               LLVMValueRef data_ptr,
 695                               LLVMValueRef mipoffsets,
 696                               LLVMValueRef s,
 697                               LLVMValueRef t,
 698                               LLVMValueRef r,
 699                               const LLVMValueRef *offsets,
 700                               LLVMValueRef colors_out[4])
 701 {
 702    const unsigned dims = bld->dims;
 703    LLVMValueRef width_vec;
 704    LLVMValueRef height_vec;
 705    LLVMValueRef depth_vec;
 706    LLVMValueRef flt_size;
 707    LLVMValueRef flt_width_vec;
 708    LLVMValueRef flt_height_vec;
 709    LLVMValueRef flt_depth_vec;
 710    LLVMValueRef x, y = NULL, z = NULL;
 711
 712    lp_build_extract_image_sizes(bld,
 713                                 &bld->int_size_bld,
 714                                 bld->int_coord_type,
 715                                 size,
 716                                 &width_vec, &height_vec, &depth_vec);
 717
 718    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 719
 720    lp_build_extract_image_sizes(bld,
 721                                 &bld->float_size_bld,
 722                                 bld->coord_type,
 723                                 flt_size,
 724                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 725
 726    /*
 727     * Compute integer texcoords.
 728     */
 729    x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, offsets[0],
 730                                     bld->static_texture_state->pot_width,
 731                                     bld->static_sampler_state->wrap_s);
 732    lp_build_name(x, "tex.x.wrapped");
 733
 734    if (dims >= 2) {
 735       y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, offsets[1],
 736                                        bld->static_texture_state->pot_height,
 737                                        bld->static_sampler_state->wrap_t);
 738       lp_build_name(y, "tex.y.wrapped");
 739
 740       if (dims == 3) {
 741          z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, offsets[2],
 742                                           bld->static_texture_state->pot_depth,
 743                                           bld->static_sampler_state->wrap_r);
 744          lp_build_name(z, "tex.z.wrapped");
 745       }
 746    }
 747    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 748        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 749        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 750       z = r;
 751       lp_build_name(z, "tex.z.layer");
 752    }
 753
 754    /*
 755     * Get texture colors.
 756     */
 757    lp_build_sample_texel_soa(bld, sampler_unit,
 758                              width_vec, height_vec, depth_vec,
 759                              x, y, z,
 760                              row_stride_vec, img_stride_vec,
 761                              data_ptr, mipoffsets, colors_out);
 762 }
 763
 764
 765 /**
 766  * Generate code to sample a mipmap level with linear filtering.
 767  * If sampling a cube texture, r = cube face in [0,5].
 768  */
 769 static void
 770 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 771                              unsigned sampler_unit,
 772                              LLVMValueRef size,
 773                              LLVMValueRef row_stride_vec,
 774                              LLVMValueRef img_stride_vec,
 775                              LLVMValueRef data_ptr,
 776                              LLVMValueRef mipoffsets,
 777                              LLVMValueRef s,
 778                              LLVMValueRef t,
 779                              LLVMValueRef r,
 780                              const LLVMValueRef *offsets,
 781                              LLVMValueRef colors_out[4])
 782 {
 783    const unsigned dims = bld->dims;
 784    LLVMValueRef width_vec;
 785    LLVMValueRef height_vec;
 786    LLVMValueRef depth_vec;
 787    LLVMValueRef flt_size;
 788    LLVMValueRef flt_width_vec;
 789    LLVMValueRef flt_height_vec;
 790    LLVMValueRef flt_depth_vec;
 791    LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
 792    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
 793    LLVMValueRef neighbors[2][2][4];
 794    int chan;
 795
 796    lp_build_extract_image_sizes(bld,
 797                                 &bld->int_size_bld,
 798                                 bld->int_coord_type,
 799                                 size,
 800                                 &width_vec, &height_vec, &depth_vec);
 801
 802    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
 803
 804    lp_build_extract_image_sizes(bld,
 805                                 &bld->float_size_bld,
 806                                 bld->coord_type,
 807                                 flt_size,
 808                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
 809
 810    /*
 811     * Compute integer texcoords.
 812     */
 813    lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, offsets[0],
 814                                bld->static_texture_state->pot_width,
 815                                bld->static_sampler_state->wrap_s,
 816                                &x0, &x1, &s_fpart);
 817    lp_build_name(x0, "tex.x0.wrapped");
 818    lp_build_name(x1, "tex.x1.wrapped");
 819
 820    if (dims >= 2) {
 821       lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, offsets[1],
 822                                   bld->static_texture_state->pot_height,
 823                                   bld->static_sampler_state->wrap_t,
 824                                   &y0, &y1, &t_fpart);
 825       lp_build_name(y0, "tex.y0.wrapped");
 826       lp_build_name(y1, "tex.y1.wrapped");
 827
 828       if (dims == 3) {
 829          lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, offsets[2],
 830                                      bld->static_texture_state->pot_depth,
 831                                      bld->static_sampler_state->wrap_r,
 832                                      &z0, &z1, &r_fpart);
 833          lp_build_name(z0, "tex.z0.wrapped");
 834          lp_build_name(z1, "tex.z1.wrapped");
 835       }
 836    }
 837    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
 838        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
 839        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
 840       z0 = z1 = r;  /* cube face or array layer */
 841       lp_build_name(z0, "tex.z0.layer");
 842       lp_build_name(z1, "tex.z1.layer");
 843    }
 844
 845
 846    /*
 847     * Get texture colors.
 848     */
 849    /* get x0/x1 texels */
 850    lp_build_sample_texel_soa(bld, sampler_unit,
 851                              width_vec, height_vec, depth_vec,
 852                              x0, y0, z0,
 853                              row_stride_vec, img_stride_vec,
 854                              data_ptr, mipoffsets, neighbors[0][0]);
 855    lp_build_sample_texel_soa(bld, sampler_unit,
 856                              width_vec, height_vec, depth_vec,
 857                              x1, y0, z0,
 858                              row_stride_vec, img_stride_vec,
 859                              data_ptr, mipoffsets, neighbors[0][1]);
 860
 861    if (dims == 1) {
 862       /* Interpolate two samples from 1D image to produce one color */
 863       for (chan = 0; chan < 4; chan++) {
 864          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
 865                                           neighbors[0][0][chan],
 866                                           neighbors[0][1][chan],
 867                                           0);
 868       }
 869    }
 870    else {
 871       /* 2D/3D texture */
 872       LLVMValueRef colors0[4];
 873
 874       /* get x0/x1 texels at y1 */
 875       lp_build_sample_texel_soa(bld, sampler_unit,
 876                                 width_vec, height_vec, depth_vec,
 877                                 x0, y1, z0,
 878                                 row_stride_vec, img_stride_vec,
 879                                 data_ptr, mipoffsets, neighbors[1][0]);
 880       lp_build_sample_texel_soa(bld, sampler_unit,
 881                                 width_vec, height_vec, depth_vec,
 882                                 x1, y1, z0,
 883                                 row_stride_vec, img_stride_vec,
 884                                 data_ptr, mipoffsets, neighbors[1][1]);
 885
 886       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
 887       for (chan = 0; chan < 4; chan++) {
 888          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
 889                                           s_fpart, t_fpart,
 890                                           neighbors[0][0][chan],
 891                                           neighbors[0][1][chan],
 892                                           neighbors[1][0][chan],
 893                                           neighbors[1][1][chan],
 894                                           0);
 895       }
 896
 897       if (dims == 3) {
 898          LLVMValueRef neighbors1[2][2][4];
 899          LLVMValueRef colors1[4];
 900
 901          /* get x0/x1/y0/y1 texels at z1 */
 902          lp_build_sample_texel_soa(bld, sampler_unit,
 903                                    width_vec, height_vec, depth_vec,
 904                                    x0, y0, z1,
 905                                    row_stride_vec, img_stride_vec,
 906                                    data_ptr, mipoffsets, neighbors1[0][0]);
 907          lp_build_sample_texel_soa(bld, sampler_unit,
 908                                    width_vec, height_vec, depth_vec,
 909                                    x1, y0, z1,
 910                                    row_stride_vec, img_stride_vec,
 911                                    data_ptr, mipoffsets, neighbors1[0][1]);
 912          lp_build_sample_texel_soa(bld, sampler_unit,
 913                                    width_vec, height_vec, depth_vec,
 914                                    x0, y1, z1,
 915                                    row_stride_vec, img_stride_vec,
 916                                    data_ptr, mipoffsets, neighbors1[1][0]);
 917          lp_build_sample_texel_soa(bld, sampler_unit,
 918                                    width_vec, height_vec, depth_vec,
 919                                    x1, y1, z1,
 920                                    row_stride_vec, img_stride_vec,
 921                                    data_ptr, mipoffsets, neighbors1[1][1]);
 922
 923          /* Bilinear interpolate the four samples from the second Z slice */
 924          for (chan = 0; chan < 4; chan++) {
 925             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
 926                                              s_fpart, t_fpart,
 927                                              neighbors1[0][0][chan],
 928                                              neighbors1[0][1][chan],
 929                                              neighbors1[1][0][chan],
 930                                              neighbors1[1][1][chan],
 931                                              0);
 932          }
 933
 934          /* Linearly interpolate the two samples from the two 3D slices */
 935          for (chan = 0; chan < 4; chan++) {
 936             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
 937                                              r_fpart,
 938                                              colors0[chan], colors1[chan],
 939                                              0);
 940          }
 941       }
 942       else {
 943          /* 2D tex */
 944          for (chan = 0; chan < 4; chan++) {
 945             colors_out[chan] = colors0[chan];
 946          }
 947       }
 948    }
 949 }
 950
 951
 952 /**
 953  * Sample the texture/mipmap using given image filter and mip filter.
 954  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 955  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 956  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 957  */
 958 static void
 959 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 960                        unsigned sampler_unit,
 961                        unsigned img_filter,
 962                        unsigned mip_filter,
 963                        LLVMValueRef s,
 964                        LLVMValueRef t,
 965                        LLVMValueRef r,
 966                        const LLVMValueRef *offsets,
 967                        LLVMValueRef ilevel0,
 968                        LLVMValueRef ilevel1,
 969                        LLVMValueRef lod_fpart,
 970                        LLVMValueRef *colors_out)
 971 {
 972    LLVMBuilderRef builder = bld->gallivm->builder;
 973    LLVMValueRef size0 = NULL;
 974    LLVMValueRef size1 = NULL;
 975    LLVMValueRef row_stride0_vec = NULL;
 976    LLVMValueRef row_stride1_vec = NULL;
 977    LLVMValueRef img_stride0_vec = NULL;
 978    LLVMValueRef img_stride1_vec = NULL;
 979    LLVMValueRef data_ptr0 = NULL;
 980    LLVMValueRef data_ptr1 = NULL;
 981    LLVMValueRef mipoff0 = NULL;
 982    LLVMValueRef mipoff1 = NULL;
 983    LLVMValueRef colors0[4], colors1[4];
 984    unsigned chan;
 985
 986    /* sample the first mipmap level */
 987    lp_build_mipmap_level_sizes(bld, ilevel0,
 988                                &size0,
 989                                &row_stride0_vec, &img_stride0_vec);
 990    if (bld->num_lods == 1) {
 991       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
 992    }
 993    else {
 994       /* This path should work for num_lods 1 too but slightly less efficient */
 995       data_ptr0 = bld->base_ptr;
 996       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
 997    }
 998    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 999       lp_build_sample_image_nearest(bld, sampler_unit,
1000                                     size0,
1001                                     row_stride0_vec, img_stride0_vec,
1002                                     data_ptr0, mipoff0, s, t, r, offsets,
1003                                     colors0);
1004    }
1005    else {
1006       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1007       lp_build_sample_image_linear(bld, sampler_unit,
1008                                    size0,
1009                                    row_stride0_vec, img_stride0_vec,
1010                                    data_ptr0, mipoff0, s, t, r, offsets,
1011                                    colors0);
1012    }
1013
1014    /* Store the first level's colors in the output variables */
1015    for (chan = 0; chan < 4; chan++) {
1016        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1017    }
1018
1019    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1020       struct lp_build_if_state if_ctx;
1021       LLVMValueRef need_lerp;
1022
1023       /* need_lerp = lod_fpart > 0 */
1024       if (bld->num_lods == 1) {
1025          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1026                                    lod_fpart, bld->levelf_bld.zero,
1027                                    "need_lerp");
1028       }
1029       else {
1030          /*
1031           * We'll do mip filtering if any of the quads (or individual
1032           * pixel in case of per-pixel lod) need it.
1033           * It might be better to split the vectors here and only fetch/filter
1034           * quads which need it.
1035           */
1036          /*
1037           * We unfortunately need to clamp lod_fpart here since we can get
1038           * negative values which would screw up filtering if not all
1039           * lod_fpart values have same sign.
1040           */
1041          lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
1042                                   bld->levelf_bld.zero);
1043          need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
1044                                       PIPE_FUNC_GREATER,
1045                                       lod_fpart, bld->levelf_bld.zero);
1046          need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
1047       }
1048
1049       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1050       {
1051          /* sample the second mipmap level */
1052          lp_build_mipmap_level_sizes(bld, ilevel1,
1053                                      &size1,
1054                                      &row_stride1_vec, &img_stride1_vec);
1055          if (bld->num_lods == 1) {
1056             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1057          }
1058          else {
1059             data_ptr1 = bld->base_ptr;
1060             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1061          }
1062          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1063             lp_build_sample_image_nearest(bld, sampler_unit,
1064                                           size1,
1065                                           row_stride1_vec, img_stride1_vec,
1066                                           data_ptr1, mipoff1, s, t, r, offsets,
1067                                           colors1);
1068          }
1069          else {
1070             lp_build_sample_image_linear(bld, sampler_unit,
1071                                          size1,
1072                                          row_stride1_vec, img_stride1_vec,
1073                                          data_ptr1, mipoff1, s, t, r, offsets,
1074                                          colors1);
1075          }
1076
1077          /* interpolate samples from the two mipmap levels */
1078
1079          if (bld->num_lods != bld->coord_type.length)
1080             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1081                                                               bld->levelf_bld.type,
1082                                                               bld->texel_bld.type,
1083                                                               lod_fpart);
1084
1085          for (chan = 0; chan < 4; chan++) {
1086             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1087                                           colors0[chan], colors1[chan],
1088                                           0);
1089             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1090          }
1091       }
1092       lp_build_endif(&if_ctx);
1093    }
1094 }
1095
1096
1097 /**
1098  * Build (per-coord) layer value.
1099  * Either clamp layer to valid values or fill in optional out_of_bounds
1100  * value and just return value unclamped.
1101  */
1102 static LLVMValueRef
1103 lp_build_layer_coord(struct lp_build_sample_context *bld,
1104                      unsigned texture_unit,
1105                      LLVMValueRef layer,
1106                      LLVMValueRef *out_of_bounds)
1107 {
1108    LLVMValueRef num_layers;
1109    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1110
1111    num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1112                                           bld->gallivm, texture_unit);
1113
1114    if (out_of_bounds) {
1115       LLVMValueRef out1, out;
1116       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1117       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1118       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1119       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1120       return layer;
1121    }
1122    else {
1123       LLVMValueRef maxlayer;
1124       maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1125       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1126       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1127    }
1128 }
1129
1130
1131 /**
1132  * Calculate cube face, lod, mip levels.
1133  */
1134 static void
1135 lp_build_sample_common(struct lp_build_sample_context *bld,
1136                        unsigned texture_index,
1137                        unsigned sampler_index,
1138                        LLVMValueRef *s,
1139                        LLVMValueRef *t,
1140                        LLVMValueRef *r,
1141                        const struct lp_derivatives *derivs, /* optional */
1142                        LLVMValueRef lod_bias, /* optional */
1143                        LLVMValueRef explicit_lod, /* optional */
1144                        LLVMValueRef *lod_ipart,
1145                        LLVMValueRef *lod_fpart,
1146                        LLVMValueRef *ilevel0,
1147                        LLVMValueRef *ilevel1)
1148 {
1149    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1150    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1151    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1152    const unsigned target = bld->static_texture_state->target;
1153    LLVMValueRef first_level, cube_rho = NULL;
1154
1155    /*
1156    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1157           mip_filter, min_filter, mag_filter);
1158    */
1159
1160    /*
1161     * Choose cube face, recompute texcoords for the chosen face and
1162     * compute rho here too (as it requires transform of derivatives).
1163     */
1164    if (target == PIPE_TEXTURE_CUBE) {
1165       LLVMValueRef face, face_s, face_t;
1166       boolean need_derivs;
1167       need_derivs = ((min_filter != mag_filter ||
1168                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1169                       !bld->static_sampler_state->min_max_lod_equal &&
1170                       !explicit_lod);
1171       lp_build_cube_lookup(bld, *s, *t, *r, derivs, &face, &face_s, &face_t,
1172                            &cube_rho, need_derivs);
1173       *s = face_s; /* vec */
1174       *t = face_t; /* vec */
1175       /* use 'r' to indicate cube face */
1176       *r = face; /* vec */
1177    }
1178    else if (target == PIPE_TEXTURE_1D_ARRAY) {
1179       *r = lp_build_iround(&bld->coord_bld, *t);
1180       *r = lp_build_layer_coord(bld, texture_index, *r, NULL);
1181    }
1182    else if (target == PIPE_TEXTURE_2D_ARRAY) {
1183       *r = lp_build_iround(&bld->coord_bld, *r);
1184       *r = lp_build_layer_coord(bld, texture_index, *r, NULL);
1185    }
1186
1187    /*
1188     * Compute the level of detail (float).
1189     */
1190    if (min_filter != mag_filter ||
1191        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1192       /* Need to compute lod either to choose mipmap levels or to
1193        * distinguish between minification/magnification with one mipmap level.
1194        */
1195       lp_build_lod_selector(bld, texture_index, sampler_index,
1196                             *s, *t, *r, cube_rho,
1197                             derivs, lod_bias, explicit_lod,
1198                             mip_filter,
1199                             lod_ipart, lod_fpart);
1200    } else {
1201       *lod_ipart = bld->leveli_bld.zero;
1202    }
1203
1204    /*
1205     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1206     */
1207    switch (mip_filter) {
1208    default:
1209       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1210       /* fall-through */
1211    case PIPE_TEX_MIPFILTER_NONE:
1212       /* always use mip level 0 */
1213       if (HAVE_LLVM == 0x0207 && target == PIPE_TEXTURE_CUBE) {
1214          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1215           * We should be able to set ilevel0 = const(0) but that causes
1216           * bad x86 code to be emitted.
1217           */
1218          assert(*lod_ipart);
1219          lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1220       }
1221       else {
1222          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1223                                                        bld->gallivm, texture_index);
1224          first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1225          *ilevel0 = first_level;
1226       }
1227       break;
1228    case PIPE_TEX_MIPFILTER_NEAREST:
1229       assert(*lod_ipart);
1230       lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1231       break;
1232    case PIPE_TEX_MIPFILTER_LINEAR:
1233       assert(*lod_ipart);
1234       assert(*lod_fpart);
1235       lp_build_linear_mip_levels(bld, texture_index,
1236                                  *lod_ipart, lod_fpart,
1237                                  ilevel0, ilevel1);
1238       break;
1239    }
1240 }
1241
1242 /**
1243  * General texture sampling codegen.
1244  * This function handles texture sampling for all texture targets (1D,
1245  * 2D, 3D, cube) and all filtering modes.
1246  */
1247 static void
1248 lp_build_sample_general(struct lp_build_sample_context *bld,
1249                         unsigned sampler_unit,
1250                         LLVMValueRef s,
1251                         LLVMValueRef t,
1252                         LLVMValueRef r,
1253                         const LLVMValueRef *offsets,
1254                         LLVMValueRef lod_ipart,
1255                         LLVMValueRef lod_fpart,
1256                         LLVMValueRef ilevel0,
1257                         LLVMValueRef ilevel1,
1258                         LLVMValueRef *colors_out)
1259 {
1260    struct lp_build_context *int_bld = &bld->int_bld;
1261    LLVMBuilderRef builder = bld->gallivm->builder;
1262    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1263    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1264    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1265    LLVMValueRef texels[4];
1266    unsigned chan;
1267
1268    /*
1269     * Get/interpolate texture colors.
1270     */
1271
1272    for (chan = 0; chan < 4; ++chan) {
1273      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
1274      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
1275    }
1276
1277    if (min_filter == mag_filter) {
1278       /* no need to distinguish between minification and magnification */
1279       lp_build_sample_mipmap(bld, sampler_unit,
1280                              min_filter, mip_filter,
1281                              s, t, r, offsets,
1282                              ilevel0, ilevel1, lod_fpart,
1283                              texels);
1284    }
1285    else {
1286       /* Emit conditional to choose min image filter or mag image filter
1287        * depending on the lod being > 0 or <= 0, respectively.
1288        */
1289       struct lp_build_if_state if_ctx;
1290       LLVMValueRef minify;
1291
1292       /*
1293        * XXX this should to all lods into account, if some are min
1294        * some max probably could hack up the coords/weights in the linear
1295        * path with selects to work for nearest.
1296        * If that's just two quads sitting next to each other it seems
1297        * quite ok to do the same filtering method on both though, at
1298        * least unless we have explicit lod (and who uses different
1299        * min/mag filter with that?)
1300        */
1301       if (bld->num_lods > 1)
1302          lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1303                                              lp_build_const_int32(bld->gallivm, 0), "");
1304
1305       /* minify = lod >= 0.0 */
1306       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1307                              lod_ipart, int_bld->zero, "");
1308
1309       lp_build_if(&if_ctx, bld->gallivm, minify);
1310       {
1311          /* Use the minification filter */
1312          lp_build_sample_mipmap(bld, sampler_unit,
1313                                 min_filter, mip_filter,
1314                                 s, t, r, offsets,
1315                                 ilevel0, ilevel1, lod_fpart,
1316                                 texels);
1317       }
1318       lp_build_else(&if_ctx);
1319       {
1320          /* Use the magnification filter */
1321          lp_build_sample_mipmap(bld, sampler_unit,
1322                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1323                                 s, t, r, offsets,
1324                                 ilevel0, NULL, NULL,
1325                                 texels);
1326       }
1327       lp_build_endif(&if_ctx);
1328    }
1329
1330    for (chan = 0; chan < 4; ++chan) {
1331      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
1332      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
1333    }
1334 }
1335
1336
1337 /**
1338  * Texel fetch function.
1339  * In contrast to general sampling there is no filtering, no coord minification,
1340  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
1341  * directly to be applied to the selected mip level (after adding texel offsets).
1342  * This function handles texel fetch for all targets where texel fetch is supported
1343  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
1344  */
1345 static void
1346 lp_build_fetch_texel(struct lp_build_sample_context *bld,
1347                      unsigned texture_unit,
1348                      const LLVMValueRef *coords,
1349                      LLVMValueRef explicit_lod,
1350                      const LLVMValueRef *offsets,
1351                      LLVMValueRef *colors_out)
1352 {
1353    struct lp_build_context *perquadi_bld = &bld->leveli_bld;
1354    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1355    unsigned dims = bld->dims, chan;
1356    unsigned target = bld->static_texture_state->target;
1357    boolean out_of_bound_ret_zero = TRUE;
1358    LLVMValueRef size, ilevel;
1359    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
1360    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
1361    LLVMValueRef width, height, depth, i, j;
1362    LLVMValueRef offset, out_of_bounds, out1;
1363
1364    out_of_bounds = int_coord_bld->zero;
1365
1366    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
1367       if (bld->num_lods != int_coord_bld->type.length) {
1368          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
1369                                             perquadi_bld->type, explicit_lod, 0);
1370       }
1371       else {
1372          ilevel = explicit_lod;
1373       }
1374       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
1375                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
1376    }
1377    else {
1378       assert(bld->num_lods == 1);
1379       if (bld->static_texture_state->target != PIPE_BUFFER) {
1380          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
1381                                                   bld->gallivm, texture_unit);
1382       }
1383       else {
1384          ilevel = lp_build_const_int32(bld->gallivm, 0);
1385       }
1386    }
1387    lp_build_mipmap_level_sizes(bld, ilevel,
1388                                &size,
1389                                &row_stride_vec, &img_stride_vec);
1390    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
1391                                 size, &width, &height, &depth);
1392
1393    if (target == PIPE_TEXTURE_1D_ARRAY ||
1394        target == PIPE_TEXTURE_2D_ARRAY) {
1395       if (target == PIPE_TEXTURE_1D_ARRAY) {
1396          z = y;
1397       }
1398       if (out_of_bound_ret_zero) {
1399          z = lp_build_layer_coord(bld, texture_unit, z, &out1);
1400          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1401       }
1402       else {
1403          z = lp_build_layer_coord(bld, texture_unit, z, NULL);
1404       }
1405    }
1406
1407    /* This is a lot like border sampling */
1408    if (offsets[0]) {
1409       /*
1410        * coords are really unsigned, offsets are signed, but I don't think
1411        * exceeding 31 bits is possible
1412        */
1413       x = lp_build_add(int_coord_bld, x, offsets[0]);
1414    }
1415    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
1416    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1417    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
1418    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1419
1420    if (dims >= 2) {
1421       if (offsets[1]) {
1422          y = lp_build_add(int_coord_bld, y, offsets[1]);
1423       }
1424       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
1425       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1426       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
1427       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1428
1429       if (dims >= 3) {
1430          if (offsets[2]) {
1431             z = lp_build_add(int_coord_bld, z, offsets[2]);
1432          }
1433          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
1434          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1435          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
1436          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1437       }
1438    }
1439
1440    lp_build_sample_offset(int_coord_bld,
1441                           bld->format_desc,
1442                           x, y, z, row_stride_vec, img_stride_vec,
1443                           &offset, &i, &j);
1444
1445    if (bld->static_texture_state->target != PIPE_BUFFER) {
1446       offset = lp_build_add(int_coord_bld, offset,
1447                             lp_build_get_mip_offsets(bld, ilevel));
1448    }
1449
1450    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
1451
1452    lp_build_fetch_rgba_soa(bld->gallivm,
1453                            bld->format_desc,
1454                            bld->texel_type,
1455                            bld->base_ptr, offset,
1456                            i, j,
1457                            colors_out);
1458
1459    if (out_of_bound_ret_zero) {
1460       /*
1461        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
1462        * Could use min/max above instead of out-of-bounds comparisons
1463        * if we don't care about the result returned for out-of-bounds.
1464        */
1465       for (chan = 0; chan < 4; chan++) {
1466          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
1467                                             bld->texel_bld.zero, colors_out[chan]);
1468       }
1469    }
1470 }
1471
1472
1473 /**
1474  * Do shadow test/comparison.
1475  * \param coords  incoming texcoords
1476  * \param texel  the texel to compare against (use the X channel)
1477  * Ideally this should really be done per-sample.
1478  */
1479 static void
1480 lp_build_sample_compare(struct lp_build_sample_context *bld,
1481                         const LLVMValueRef *coords,
1482                         LLVMValueRef texel[4])
1483 {
1484    struct lp_build_context *texel_bld = &bld->texel_bld;
1485    LLVMBuilderRef builder = bld->gallivm->builder;
1486    LLVMValueRef res, p;
1487    const unsigned chan = 0;
1488    unsigned chan_type;
1489    const struct util_format_description *format_desc;
1490
1491    if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1492       return;
1493
1494    if (bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY ||
1495        bld->static_texture_state->target == PIPE_TEXTURE_CUBE) {
1496       p = coords[3];
1497    }
1498    else {
1499       p = coords[2];
1500    }
1501
1502    /* debug code */
1503    if (0) {
1504       LLVMValueRef indx = lp_build_const_int32(bld->gallivm, 0);
1505       LLVMValueRef coord = LLVMBuildExtractElement(builder, p, indx, "");
1506       LLVMValueRef tex = LLVMBuildExtractElement(builder, texel[chan], indx, "");
1507       lp_build_printf(bld->gallivm, "shadow compare coord %f to texture %f\n",
1508                       coord, tex);
1509    }
1510
1511    /* Clamp p coords to [0,1] for fixed function depth texture format */
1512    format_desc = util_format_description(bld->static_texture_state->format);
1513    /* not entirely sure we couldn't end up with non-valid swizzle here */
1514    chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1515                   format_desc->channel[format_desc->swizzle[0]].type :
1516                   UTIL_FORMAT_TYPE_FLOAT;
1517    if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1518       p = lp_build_clamp(&bld->coord_bld, p,
1519                          bld->coord_bld.zero, bld->coord_bld.one);
1520    }
1521
1522    /*
1523     * technically this is not entirely correct for unorm depth as the ref value
1524     * should be converted to the depth format (quantization!) and comparison
1525     * then done in texture format.
1526     */
1527
1528    /* result = (p FUNC texel) ? 1 : 0 */
1529    /*
1530     * honor d3d10 floating point rules here, which state that comparisons
1531     * are ordered except NOT_EQUAL which is unordered.
1532     */
1533    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
1534       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
1535                                  p, texel[chan]);
1536    }
1537    else {
1538       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
1539                          p, texel[chan]);
1540    }
1541    res = lp_build_select(texel_bld, res, texel_bld->one, texel_bld->zero);
1542
1543    /*
1544     * returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE.
1545     * This should be ok because sampler swizzle is applied on top of it.
1546     */
1547    texel[0] =
1548    texel[1] =
1549    texel[2] = res;
1550    texel[3] = texel_bld->one;
1551 }
1552
1553
1554 /**
1555  * Just set texels to white instead of actually sampling the texture.
1556  * For debugging.
1557  */
1558 void
1559 lp_build_sample_nop(struct gallivm_state *gallivm,
1560                     struct lp_type type,
1561                     const LLVMValueRef *coords,
1562                     LLVMValueRef texel_out[4])
1563 {
1564    LLVMValueRef one = lp_build_one(gallivm, type);
1565    unsigned chan;
1566
1567    for (chan = 0; chan < 4; chan++) {
1568       texel_out[chan] = one;
1569    }
1570 }
1571
1572
1573 /**
1574  * Build texture sampling code.
1575  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1576  * R, G, B, A.
1577  * \param type  vector float type to use for coords, etc.
1578  * \param is_fetch  if this is a texel fetch instruction.
1579  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
1580  */
1581 void
1582 lp_build_sample_soa(struct gallivm_state *gallivm,
1583                     const struct lp_static_texture_state *static_texture_state,
1584                     const struct lp_static_sampler_state *static_sampler_state,
1585                     struct lp_sampler_dynamic_state *dynamic_state,
1586                     struct lp_type type,
1587                     boolean is_fetch,
1588                     unsigned texture_index,
1589                     unsigned sampler_index,
1590                     const LLVMValueRef *coords,
1591                     const LLVMValueRef *offsets,
1592                     const struct lp_derivatives *derivs, /* optional */
1593                     LLVMValueRef lod_bias, /* optional */
1594                     LLVMValueRef explicit_lod, /* optional */
1595                     boolean scalar_lod,
1596                     LLVMValueRef texel_out[4])
1597 {
1598    unsigned dims = texture_dims(static_texture_state->target);
1599    unsigned num_quads = type.length / 4;
1600    unsigned mip_filter;
1601    struct lp_build_sample_context bld;
1602    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
1603    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
1604    LLVMBuilderRef builder = gallivm->builder;
1605    LLVMValueRef tex_width;
1606    LLVMValueRef s;
1607    LLVMValueRef t;
1608    LLVMValueRef r;
1609
1610    if (0) {
1611       enum pipe_format fmt = static_texture_state->format;
1612       debug_printf("Sample from %s\n", util_format_name(fmt));
1613    }
1614
1615    assert(type.floating);
1616
1617    /* Setup our build context */
1618    memset(&bld, 0, sizeof bld);
1619    bld.gallivm = gallivm;
1620    bld.static_sampler_state = &derived_sampler_state;
1621    bld.static_texture_state = static_texture_state;
1622    bld.dynamic_state = dynamic_state;
1623    bld.format_desc = util_format_description(static_texture_state->format);
1624    bld.dims = dims;
1625
1626    bld.vector_width = lp_type_width(type);
1627
1628    bld.float_type = lp_type_float(32);
1629    bld.int_type = lp_type_int(32);
1630    bld.coord_type = type;
1631    bld.int_coord_type = lp_int_type(type);
1632    bld.float_size_in_type = lp_type_float(32);
1633    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
1634    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
1635    bld.texel_type = type;
1636
1637    /* always using the first channel hopefully should be safe,
1638     * if not things WILL break in other places anyway.
1639     */
1640    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
1641        bld.format_desc->channel[0].pure_integer) {
1642       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
1643          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1644       }
1645       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1646          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
1647       }
1648    }
1649    else if (util_format_has_stencil(bld.format_desc) &&
1650        !util_format_has_depth(bld.format_desc)) {
1651       /* for stencil only formats, sample stencil (uint) */
1652       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1653    }
1654
1655    if (!static_texture_state->level_zero_only) {
1656       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
1657    } else {
1658       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
1659    }
1660    mip_filter = derived_sampler_state.min_mip_filter;
1661
1662    if (0) {
1663       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
1664    }
1665
1666    /*
1667     * This is all a bit complicated different paths are chosen for performance
1668     * reasons.
1669     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
1670     * everything (the last two options are equivalent for 4-wide case).
1671     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
1672     * lod is calculated then the lod value extracted afterwards so making this
1673     * case basically the same as far as lod handling is concerned for the
1674     * further sample/filter code as the 1 lod for everything case.
1675     * Different lod handling mostly shows up when building mipmap sizes
1676     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
1677     * (getting the fractional part of the lod to the right texels).
1678     */
1679
1680    /*
1681     * There are other situations where at least the multiple int lods could be
1682     * avoided like min and max lod being equal.
1683     */
1684    if (explicit_lod && !scalar_lod &&
1685        ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
1686         (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
1687       bld.num_lods = type.length;
1688    /* TODO: for true scalar_lod should only use 1 lod value */
1689    else if ((is_fetch && explicit_lod && bld.static_texture_state->target != PIPE_BUFFER ) ||
1690             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
1691       bld.num_lods = num_quads;
1692    }
1693    else {
1694       bld.num_lods = 1;
1695    }
1696
1697    bld.levelf_type = type;
1698    /* we want native vector size to be able to use our intrinsics */
1699    if (bld.num_lods != type.length) {
1700       bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
1701    }
1702    bld.leveli_type = lp_int_type(bld.levelf_type);
1703    bld.float_size_type = bld.float_size_in_type;
1704    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
1705     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
1706    if (bld.num_lods > 1) {
1707       bld.float_size_type.length = bld.num_lods == type.length ?
1708                                       bld.num_lods * bld.float_size_in_type.length :
1709                                       type.length;
1710    }
1711    bld.int_size_type = lp_int_type(bld.float_size_type);
1712
1713    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
1714    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
1715    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
1716    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
1717    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
1718    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
1719    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
1720    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
1721    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
1722    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
1723    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
1724    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
1725
1726    /* Get the dynamic state */
1727    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
1728    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
1729    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
1730    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
1731    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
1732    /* Note that mip_offsets is an array[level] of offsets to texture images */
1733
1734    s = coords[0];
1735    t = coords[1];
1736    r = coords[2];
1737
1738    /* width, height, depth as single int vector */
1739    if (dims <= 1) {
1740       bld.int_size = tex_width;
1741    }
1742    else {
1743       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
1744                                             tex_width, LLVMConstInt(i32t, 0, 0), "");
1745       if (dims >= 2) {
1746          LLVMValueRef tex_height =
1747             dynamic_state->height(dynamic_state, gallivm, texture_index);
1748          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
1749                                                tex_height, LLVMConstInt(i32t, 1, 0), "");
1750          if (dims >= 3) {
1751             LLVMValueRef tex_depth =
1752                dynamic_state->depth(dynamic_state, gallivm, texture_index);
1753             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
1754                                                   tex_depth, LLVMConstInt(i32t, 2, 0), "");
1755          }
1756       }
1757    }
1758
1759    if (0) {
1760       /* For debug: no-op texture sampling */
1761       lp_build_sample_nop(gallivm,
1762                           bld.texel_type,
1763                           coords,
1764                           texel_out);
1765    }
1766
1767    else if (is_fetch) {
1768       lp_build_fetch_texel(&bld, texture_index, coords,
1769                            explicit_lod, offsets,
1770                            texel_out);
1771    }
1772
1773    else {
1774       LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
1775       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
1776       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
1777                         lp_is_simple_wrap_mode(static_sampler_state->wrap_s) &&
1778                         lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
1779
1780       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
1781           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
1782          debug_printf("%s: using floating point linear filtering for %s\n",
1783                       __FUNCTION__, bld.format_desc->short_name);
1784          debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d\n",
1785                       static_sampler_state->min_img_filter,
1786                       static_sampler_state->mag_img_filter,
1787                       static_sampler_state->min_mip_filter,
1788                       static_sampler_state->wrap_s,
1789                       static_sampler_state->wrap_t);
1790       }
1791
1792       lp_build_sample_common(&bld, texture_index, sampler_index,
1793                              &s, &t, &r,
1794                              derivs, lod_bias, explicit_lod,
1795                              &lod_ipart, &lod_fpart,
1796                              &ilevel0, &ilevel1);
1797
1798       /*
1799        * we only try 8-wide sampling with soa as it appears to
1800        * be a loss with aos with AVX (but it should work).
1801        * (It should be faster if we'd support avx2)
1802        */
1803       if (num_quads == 1 || !use_aos) {
1804
1805          if (num_quads > 1) {
1806             if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1807                LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
1808                /*
1809                 * These parameters are the same for all quads,
1810                 * could probably simplify.
1811                 */
1812                lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
1813                ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
1814             }
1815          }
1816          if (use_aos) {
1817             /* do sampling/filtering with fixed pt arithmetic */
1818             lp_build_sample_aos(&bld, sampler_index,
1819                                 s, t, r, offsets,
1820                                 lod_ipart, lod_fpart,
1821                                 ilevel0, ilevel1,
1822                                 texel_out);
1823          }
1824
1825          else {
1826             lp_build_sample_general(&bld, sampler_index,
1827                                     s, t, r, offsets,
1828                                     lod_ipart, lod_fpart,
1829                                     ilevel0, ilevel1,
1830                                     texel_out);
1831          }
1832       }
1833       else {
1834          unsigned j;
1835          struct lp_build_sample_context bld4;
1836          struct lp_type type4 = type;
1837          unsigned i;
1838          LLVMValueRef texelout4[4];
1839          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
1840
1841          type4.length = 4;
1842
1843          /* Setup our build context */
1844          memset(&bld4, 0, sizeof bld4);
1845          bld4.gallivm = bld.gallivm;
1846          bld4.static_texture_state = bld.static_texture_state;
1847          bld4.static_sampler_state = bld.static_sampler_state;
1848          bld4.dynamic_state = bld.dynamic_state;
1849          bld4.format_desc = bld.format_desc;
1850          bld4.dims = bld.dims;
1851          bld4.row_stride_array = bld.row_stride_array;
1852          bld4.img_stride_array = bld.img_stride_array;
1853          bld4.base_ptr = bld.base_ptr;
1854          bld4.mip_offsets = bld.mip_offsets;
1855          bld4.int_size = bld.int_size;
1856
1857          bld4.vector_width = lp_type_width(type4);
1858
1859          bld4.float_type = lp_type_float(32);
1860          bld4.int_type = lp_type_int(32);
1861          bld4.coord_type = type4;
1862          bld4.int_coord_type = lp_int_type(type4);
1863          bld4.float_size_in_type = lp_type_float(32);
1864          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
1865          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
1866          bld4.texel_type = bld.texel_type;
1867          bld4.texel_type.length = 4;
1868          bld4.levelf_type = type4;
1869          /* we want native vector size to be able to use our intrinsics */
1870          bld4.levelf_type.length = 1;
1871          bld4.leveli_type = lp_int_type(bld4.levelf_type);
1872
1873          if (explicit_lod && !scalar_lod &&
1874              ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
1875               (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
1876             bld4.num_lods = type4.length;
1877          else
1878             bld4.num_lods = 1;
1879
1880          bld4.levelf_type = type4;
1881          /* we want native vector size to be able to use our intrinsics */
1882          if (bld4.num_lods != type4.length) {
1883             bld4.levelf_type.length = 1;
1884          }
1885          bld4.leveli_type = lp_int_type(bld4.levelf_type);
1886          bld4.float_size_type = bld4.float_size_in_type;
1887          if (bld4.num_lods > 1) {
1888             bld4.float_size_type.length = bld4.num_lods == type4.length ?
1889                                             bld4.num_lods * bld4.float_size_in_type.length :
1890                                             type4.length;
1891          }
1892          bld4.int_size_type = lp_int_type(bld4.float_size_type);
1893
1894          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
1895          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
1896          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
1897          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
1898          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
1899          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
1900          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
1901          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
1902          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
1903          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
1904          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
1905          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
1906
1907          for (i = 0; i < num_quads; i++) {
1908             LLVMValueRef s4, t4, r4;
1909             LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
1910             LLVMValueRef ilevel04, ilevel14 = NULL;
1911             LLVMValueRef offsets4[4] = { NULL };
1912             unsigned num_lods = bld4.num_lods;
1913
1914             s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
1915             t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
1916             r4 = lp_build_extract_range(gallivm, r, 4*i, 4);
1917
1918             if (offsets[0]) {
1919                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
1920                if (dims > 1) {
1921                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
1922                   if (dims > 2) {
1923                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
1924                   }
1925                }
1926             }
1927             lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
1928             ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
1929             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1930                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
1931                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
1932             }
1933
1934             if (use_aos) {
1935                /* do sampling/filtering with fixed pt arithmetic */
1936                lp_build_sample_aos(&bld4, sampler_index,
1937                                    s4, t4, r4, offsets4,
1938                                    lod_ipart4, lod_fpart4,
1939                                    ilevel04, ilevel14,
1940                                    texelout4);
1941             }
1942
1943             else {
1944                lp_build_sample_general(&bld4, sampler_index,
1945                                        s4, t4, r4, offsets4,
1946                                        lod_ipart4, lod_fpart4,
1947                                        ilevel04, ilevel14,
1948                                        texelout4);
1949             }
1950             for (j = 0; j < 4; j++) {
1951                texelouttmp[j][i] = texelout4[j];
1952             }
1953          }
1954
1955          for (j = 0; j < 4; j++) {
1956             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
1957          }
1958       }
1959
1960       lp_build_sample_compare(&bld, coords, texel_out);
1961    }
1962
1963    if (static_texture_state->target != PIPE_BUFFER) {
1964       apply_sampler_swizzle(&bld, texel_out);
1965    }
1966
1967    /*
1968     * texel type can be a (32bit) int/uint (for pure int formats only),
1969     * however we are expected to always return floats (storage is untyped).
1970     */
1971    if (!bld.texel_type.floating) {
1972       unsigned chan;
1973       for (chan = 0; chan < 4; chan++) {
1974          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
1975                                             lp_build_vec_type(gallivm, type), "");
1976       }
1977    }
1978 }
1979
1980 void
1981 lp_build_size_query_soa(struct gallivm_state *gallivm,
1982                         const struct lp_static_texture_state *static_state,
1983                         struct lp_sampler_dynamic_state *dynamic_state,
1984                         struct lp_type int_type,
1985                         unsigned texture_unit,
1986                         unsigned target,
1987                         boolean is_sviewinfo,
1988                         boolean scalar_lod,
1989                         LLVMValueRef explicit_lod,
1990                         LLVMValueRef *sizes_out)
1991 {
1992    LLVMValueRef lod, level, size;
1993    LLVMValueRef first_level = NULL;
1994    int dims, i;
1995    boolean has_array;
1996    unsigned num_lods = 1;
1997    struct lp_build_context bld_int_vec4;
1998
1999    /*
2000     * Do some sanity verification about bound texture and shader dcl target.
2001     * Not entirely sure what's possible but assume array/non-array
2002     * always compatible (probably not ok for OpenGL but d3d10 has no
2003     * distinction of arrays at the resource level).
2004     * Everything else looks bogus (though not entirely sure about rect/2d).
2005     * Currently disabled because it causes assertion failures if there's
2006     * nothing bound (or rather a dummy texture, not that this case would
2007     * return the right values).
2008     */
2009    if (0 && static_state->target != target) {
2010       if (static_state->target == PIPE_TEXTURE_1D)
2011          assert(target == PIPE_TEXTURE_1D_ARRAY);
2012       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2013          assert(target == PIPE_TEXTURE_1D);
2014       else if (static_state->target == PIPE_TEXTURE_2D)
2015          assert(target == PIPE_TEXTURE_2D_ARRAY);
2016       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2017          assert(target == PIPE_TEXTURE_2D);
2018       else if (static_state->target == PIPE_TEXTURE_CUBE)
2019          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2020       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2021          assert(target == PIPE_TEXTURE_CUBE);
2022       else
2023          assert(0);
2024    }
2025
2026    dims = texture_dims(target);
2027
2028    switch (target) {
2029    case PIPE_TEXTURE_1D_ARRAY:
2030    case PIPE_TEXTURE_2D_ARRAY:
2031       has_array = TRUE;
2032       break;
2033    default:
2034       has_array = FALSE;
2035       break;
2036    }
2037
2038    assert(!int_type.floating);
2039
2040    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2041
2042    if (explicit_lod) {
2043       /* FIXME: this needs to honor per-element lod */
2044       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2045       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2046       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2047       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2048    } else {
2049       lod = bld_int_vec4.zero;
2050    }
2051
2052    size = bld_int_vec4.undef;
2053
2054    size = LLVMBuildInsertElement(gallivm->builder, size,
2055                                  dynamic_state->width(dynamic_state, gallivm, texture_unit),
2056                                  lp_build_const_int32(gallivm, 0), "");
2057
2058    if (dims >= 2) {
2059       size = LLVMBuildInsertElement(gallivm->builder, size,
2060                                     dynamic_state->height(dynamic_state, gallivm, texture_unit),
2061                                     lp_build_const_int32(gallivm, 1), "");
2062    }
2063
2064    if (dims >= 3) {
2065       size = LLVMBuildInsertElement(gallivm->builder, size,
2066                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2067                                     lp_build_const_int32(gallivm, 2), "");
2068    }
2069
2070    size = lp_build_minify(&bld_int_vec4, size, lod);
2071
2072    if (has_array)
2073       size = LLVMBuildInsertElement(gallivm->builder, size,
2074                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2075                                     lp_build_const_int32(gallivm, dims), "");
2076
2077    /*
2078     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2079     * if level is out of bounds (note this can't cover unbound texture
2080     * here, which also requires returning zero).
2081     */
2082    if (explicit_lod && is_sviewinfo) {
2083       LLVMValueRef last_level, out, out1;
2084       struct lp_build_context leveli_bld;
2085
2086       /* everything is scalar for now */
2087       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2088       last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2089
2090       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2091       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2092       out = lp_build_or(&leveli_bld, out, out1);
2093       if (num_lods == 1) {
2094          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2095       }
2096       else {
2097          /* TODO */
2098          assert(0);
2099       }
2100       size = lp_build_andnot(&bld_int_vec4, size, out);
2101    }
2102    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2103       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2104                                                 size,
2105                                                 lp_build_const_int32(gallivm, i));
2106    }
2107    if (is_sviewinfo) {
2108       for (; i < 4; i++) {
2109          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2110       }
2111    }
2112
2113    /*
2114     * if there's no explicit_lod (buffers, rects) queries requiring nr of
2115     * mips would be illegal.
2116     */
2117    if (is_sviewinfo && explicit_lod) {
2118       struct lp_build_context bld_int_scalar;
2119       LLVMValueRef num_levels;
2120       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2121
2122       if (static_state->level_zero_only) {
2123          num_levels = bld_int_scalar.one;
2124       }
2125       else {
2126          LLVMValueRef last_level;
2127
2128          last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2129          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2130          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2131       }
2132       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2133                                         num_levels);
2134    }
2135 }