src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include "pipe/p_defines.h"
  36 #include "pipe/p_state.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_dump.h"
  39 #include "util/u_memory.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_cpu_detect.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_format.h"
  52 #include "lp_bld_sample.h"
  53
  54
  55 /**
  56  * Keep all information for sampling code generation in a single place.
  57  */
  58 struct lp_build_sample_context
  59 {
  60    LLVMBuilderRef builder;
  61
  62    const struct lp_sampler_static_state *static_state;
  63
  64    struct lp_sampler_dynamic_state *dynamic_state;
  65
  66    const struct util_format_description *format_desc;
  67
  68    /** regular scalar float type */
  69    struct lp_type float_type;
  70    struct lp_build_context float_bld;
  71
  72    /** regular scalar float type */
  73    struct lp_type int_type;
  74    struct lp_build_context int_bld;
  75
  76    /** Incoming coordinates type and build context */
  77    struct lp_type coord_type;
  78    struct lp_build_context coord_bld;
  79
  80    /** Unsigned integer coordinates */
  81    struct lp_type uint_coord_type;
  82    struct lp_build_context uint_coord_bld;
  83
  84    /** Signed integer coordinates */
  85    struct lp_type int_coord_type;
  86    struct lp_build_context int_coord_bld;
  87
  88    /** Output texels type and build context */
  89    struct lp_type texel_type;
  90    struct lp_build_context texel_bld;
  91 };
  92
  93
  94 /**
  95  * Does the given texture wrap mode allow sampling the texture border color?
  96  * XXX maybe move this into gallium util code.
  97  */
  98 static boolean
  99 wrap_mode_uses_border_color(unsigned mode)
 100 {
 101    switch (mode) {
 102    case PIPE_TEX_WRAP_REPEAT:
 103    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 104    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 105    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 106       return FALSE;
 107    case PIPE_TEX_WRAP_CLAMP:
 108    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 109    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 110    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 111       return TRUE;
 112    default:
 113       assert(0 && "unexpected wrap mode");
 114       return FALSE;
 115    }
 116 }
 117
 118
 119 static LLVMValueRef
 120 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 121                           LLVMValueRef data_array, LLVMValueRef level)
 122 {
 123    LLVMValueRef indexes[2], data_ptr;
 124    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 125    indexes[1] = level;
 126    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 127    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 128    return data_ptr;
 129 }
 130
 131
 132 static LLVMValueRef
 133 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 134                                 LLVMValueRef data_array, int level)
 135 {
 136    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 137    return lp_build_get_mipmap_level(bld, data_array, lvl);
 138 }
 139
 140
 141 /**
 142  * Dereference stride_array[mipmap_level] array to get a stride.
 143  * Return stride as a vector.
 144  */
 145 static LLVMValueRef
 146 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 147                               LLVMValueRef stride_array, LLVMValueRef level)
 148 {
 149    LLVMValueRef indexes[2], stride;
 150    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 151    indexes[1] = level;
 152    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 153    stride = LLVMBuildLoad(bld->builder, stride, "");
 154    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 155    return stride;
 156 }
 157
 158
 159 /** Dereference stride_array[0] array to get a stride (as vector). */
 160 static LLVMValueRef
 161 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 162                                     LLVMValueRef stride_array, int level)
 163 {
 164    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 165    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 166 }
 167
 168
 169 static int
 170 texture_dims(enum pipe_texture_target tex)
 171 {
 172    switch (tex) {
 173    case PIPE_TEXTURE_1D:
 174       return 1;
 175    case PIPE_TEXTURE_2D:
 176    case PIPE_TEXTURE_CUBE:
 177       return 2;
 178    case PIPE_TEXTURE_3D:
 179       return 3;
 180    default:
 181       assert(0 && "bad texture target in texture_dims()");
 182       return 2;
 183    }
 184 }
 185
 186
 187
 188 /**
 189  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 190  * The computation depends on whether the texture is 1D, 2D or 3D.
 191  * The result, texel, will be:
 192  *   texel[0] = red values
 193  *   texel[1] = green values
 194  *   texel[2] = blue values
 195  *   texel[3] = alpha values
 196  */
 197 static void
 198 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 199                           LLVMValueRef width,
 200                           LLVMValueRef height,
 201                           LLVMValueRef depth,
 202                           LLVMValueRef x,
 203                           LLVMValueRef y,
 204                           LLVMValueRef z,
 205                           LLVMValueRef y_stride,
 206                           LLVMValueRef z_stride,
 207                           LLVMValueRef data_ptr,
 208                           LLVMValueRef *texel)
 209 {
 210    const int dims = texture_dims(bld->static_state->target);
 211    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 212    LLVMValueRef offset;
 213    LLVMValueRef packed;
 214    LLVMValueRef use_border = NULL;
 215
 216    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 217    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 218       LLVMValueRef b1, b2;
 219       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 220       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 221       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 222    }
 223
 224    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 225       LLVMValueRef b1, b2;
 226       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 227       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 228       if (use_border) {
 229          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 230          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 231       }
 232       else {
 233          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 234       }
 235    }
 236
 237    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 238       LLVMValueRef b1, b2;
 239       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 240       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 241       if (use_border) {
 242          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 243          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 244       }
 245       else {
 246          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 247       }
 248    }
 249
 250    /*
 251     * Note: if we find an app which frequently samples the texture border
 252     * we might want to implement a true conditional here to avoid sampling
 253     * the texture whenever possible (since that's quite a bit of code).
 254     * Ex:
 255     *   if (use_border) {
 256     *      texel = border_color;
 257     *   }
 258     *   else {
 259     *      texel = sample_texture(coord);
 260     *   }
 261     * As it is now, we always sample the texture, then selectively replace
 262     * the texel color results with the border color.
 263     */
 264
 265    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 266    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 267                                    bld->format_desc,
 268                                    x, y, z, y_stride, z_stride);
 269
 270    assert(bld->format_desc->block.width == 1);
 271    assert(bld->format_desc->block.height == 1);
 272    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 273
 274    /* gather the texels from the texture */
 275    packed = lp_build_gather(bld->builder,
 276                             bld->texel_type.length,
 277                             bld->format_desc->block.bits,
 278                             bld->texel_type.width,
 279                             data_ptr, offset);
 280
 281    texel[0] = texel[1] = texel[2] = texel[3] = NULL;
 282
 283    /* convert texels to float rgba */
 284    lp_build_unpack_rgba_soa(bld->builder,
 285                             bld->format_desc,
 286                             bld->texel_type,
 287                             packed, texel);
 288
 289    if (use_border) {
 290       /* select texel color or border color depending on use_border */
 291       int chan;
 292       for (chan = 0; chan < 4; chan++) {
 293          LLVMValueRef border_chan =
 294             lp_build_const_scalar(bld->texel_type,
 295                                   bld->static_state->border_color[chan]);
 296          texel[chan] = lp_build_select(&bld->texel_bld, use_border,
 297                                        border_chan, texel[chan]);
 298       }
 299    }
 300 }
 301
 302
 303 static LLVMValueRef
 304 lp_build_sample_packed(struct lp_build_sample_context *bld,
 305                        LLVMValueRef x,
 306                        LLVMValueRef y,
 307                        LLVMValueRef y_stride,
 308                        LLVMValueRef data_array)
 309 {
 310    LLVMValueRef offset;
 311    LLVMValueRef data_ptr;
 312
 313    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 314                                    bld->format_desc,
 315                                    x, y, NULL, y_stride, NULL);
 316
 317    assert(bld->format_desc->block.width == 1);
 318    assert(bld->format_desc->block.height == 1);
 319    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 320
 321    /* get pointer to mipmap level 0 data */
 322    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 323
 324    return lp_build_gather(bld->builder,
 325                           bld->texel_type.length,
 326                           bld->format_desc->block.bits,
 327                           bld->texel_type.width,
 328                           data_ptr, offset);
 329 }
 330
 331
 332 /**
 333  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 334  */
 335 static LLVMValueRef
 336 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 337                       LLVMValueRef coord)
 338 {
 339    struct lp_build_context *coord_bld = &bld->coord_bld;
 340    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 341    LLVMValueRef fract, flr, isOdd;
 342
 343    /* fract = coord - floor(coord) */
 344    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 345
 346    /* flr = ifloor(coord); */
 347    flr = lp_build_ifloor(coord_bld, coord);
 348
 349    /* isOdd = flr & 1 */
 350    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 351
 352    /* make coord positive or negative depending on isOdd */
 353    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 354
 355    /* convert isOdd to float */
 356    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 357
 358    /* add isOdd to coord */
 359    coord = lp_build_add(coord_bld, coord, isOdd);
 360
 361    return coord;
 362 }
 363
 364
 365 /**
 366  * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
 367  * Return whether the given mode is supported by that function.
 368  */
 369 static boolean
 370 is_simple_wrap_mode(unsigned mode)
 371 {
 372    switch (mode) {
 373    case PIPE_TEX_WRAP_REPEAT:
 374    case PIPE_TEX_WRAP_CLAMP:
 375    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 376       return TRUE;
 377    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 378    default:
 379       return FALSE;
 380    }
 381 }
 382
 383
 384 /**
 385  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 386  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 387  * \param length  the texture size along one dimension
 388  * \param is_pot  if TRUE, length is a power of two
 389  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 390  */
 391 static LLVMValueRef
 392 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
 393                          LLVMValueRef coord,
 394                          LLVMValueRef length,
 395                          boolean is_pot,
 396                          unsigned wrap_mode)
 397 {
 398    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 399    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 400    LLVMValueRef length_minus_one;
 401
 402    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 403
 404    switch(wrap_mode) {
 405    case PIPE_TEX_WRAP_REPEAT:
 406       if(is_pot)
 407          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 408       else
 409          /* Signed remainder won't give the right results for negative
 410           * dividends but unsigned remainder does.*/
 411          coord = LLVMBuildURem(bld->builder, coord, length, "");
 412       break;
 413
 414    case PIPE_TEX_WRAP_CLAMP:
 415    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 416    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 417       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 418       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 419       break;
 420
 421    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 422    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 423    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 424    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 425       /* FIXME */
 426       _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
 427                     util_dump_tex_wrap(wrap_mode, TRUE));
 428       coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
 429       coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
 430       break;
 431
 432    default:
 433       assert(0);
 434    }
 435
 436    return coord;
 437 }
 438
 439
 440 /**
 441  * Build LLVM code for texture wrap mode for linear filtering.
 442  * \param x0_out  returns first integer texcoord
 443  * \param x1_out  returns second integer texcoord
 444  * \param weight_out  returns linear interpolation weight
 445  */
 446 static void
 447 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 448                             LLVMValueRef coord,
 449                             LLVMValueRef length,
 450                             boolean is_pot,
 451                             unsigned wrap_mode,
 452                             LLVMValueRef *x0_out,
 453                             LLVMValueRef *x1_out,
 454                             LLVMValueRef *weight_out)
 455 {
 456    struct lp_build_context *coord_bld = &bld->coord_bld;
 457    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 458    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 459    LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
 460    LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
 461    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 462    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 463    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
 464    LLVMValueRef coord0, coord1, weight;
 465
 466    switch(wrap_mode) {
 467    case PIPE_TEX_WRAP_REPEAT:
 468       /* mul by size and subtract 0.5 */
 469       coord = lp_build_mul(coord_bld, coord, length_f);
 470       coord = lp_build_sub(coord_bld, coord, half);
 471       /* convert to int */
 472       coord0 = lp_build_ifloor(coord_bld, coord);
 473       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 474       /* compute lerp weight */
 475       weight = lp_build_fract(coord_bld, coord);
 476       /* repeat wrap */
 477       if (is_pot) {
 478          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 479          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 480       }
 481       else {
 482          /* Signed remainder won't give the right results for negative
 483           * dividends but unsigned remainder does.*/
 484          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 485          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 486       }
 487       break;
 488
 489    case PIPE_TEX_WRAP_CLAMP:
 490       if (bld->static_state->normalized_coords) {
 491          coord = lp_build_mul(coord_bld, coord, length_f);
 492       }
 493       weight = lp_build_fract(coord_bld, coord);
 494       coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 495                               length_f_minus_one);
 496       coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 497       coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
 498                               length_f_minus_one);
 499       coord0 = lp_build_ifloor(coord_bld, coord0);
 500       coord1 = lp_build_ifloor(coord_bld, coord1);
 501       break;
 502
 503    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 504       if (bld->static_state->normalized_coords) {
 505          /* clamp to [0,1] */
 506          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 507          /* mul by tex size and subtract 0.5 */
 508          coord = lp_build_mul(coord_bld, coord, length_f);
 509          coord = lp_build_sub(coord_bld, coord, half);
 510       }
 511       else {
 512          LLVMValueRef min, max;
 513          /* clamp to [0.5, length - 0.5] */
 514          min = lp_build_const_scalar(coord_bld->type, 0.5F);
 515          max = lp_build_sub(coord_bld, length_f, min);
 516          coord = lp_build_clamp(coord_bld, coord, min, max);
 517       }
 518       /* compute lerp weight */
 519       weight = lp_build_fract(coord_bld, coord);
 520       /* coord0 = floor(coord); */
 521       coord0 = lp_build_ifloor(coord_bld, coord);
 522       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 523       /* coord0 = max(coord0, 0) */
 524       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 525       /* coord1 = min(coord1, length-1) */
 526       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 527       break;
 528
 529    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 530       {
 531          LLVMValueRef min, max;
 532          if (bld->static_state->normalized_coords) {
 533             /* min = -1.0 / (2 * length) = -0.5 / length */
 534             min = lp_build_mul(coord_bld,
 535                                lp_build_const_scalar(coord_bld->type, -0.5F),
 536                                lp_build_rcp(coord_bld, length_f));
 537             /* max = 1.0 - min */
 538             max = lp_build_sub(coord_bld, coord_bld->one, min);
 539             /* coord = clamp(coord, min, max) */
 540             coord = lp_build_clamp(coord_bld, coord, min, max);
 541             /* scale coord to length (and sub 0.5?) */
 542             coord = lp_build_mul(coord_bld, coord, length_f);
 543             coord = lp_build_sub(coord_bld, coord, half);
 544          }
 545          else {
 546             /* clamp to [-0.5, length + 0.5] */
 547             min = lp_build_const_scalar(coord_bld->type, -0.5F);
 548             max = lp_build_sub(coord_bld, length_f, min);
 549             coord = lp_build_clamp(coord_bld, coord, min, max);
 550             coord = lp_build_sub(coord_bld, coord, half);
 551          }
 552          /* compute lerp weight */
 553          weight = lp_build_fract(coord_bld, coord);
 554          /* convert to int */
 555          coord0 = lp_build_ifloor(coord_bld, coord);
 556          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 557       }
 558       break;
 559
 560    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 561       /* compute mirror function */
 562       coord = lp_build_coord_mirror(bld, coord);
 563
 564       /* scale coord to length */
 565       coord = lp_build_mul(coord_bld, coord, length_f);
 566       coord = lp_build_sub(coord_bld, coord, half);
 567
 568       /* compute lerp weight */
 569       weight = lp_build_fract(coord_bld, coord);
 570
 571       /* convert to int coords */
 572       coord0 = lp_build_ifloor(coord_bld, coord);
 573       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 574
 575       /* coord0 = max(coord0, 0) */
 576       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 577       /* coord1 = min(coord1, length-1) */
 578       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 579       break;
 580
 581    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 582       {
 583          LLVMValueRef min, max;
 584          /* min = 1.0 / (2 * length) */
 585          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 586          /* max = 1.0 - min */
 587          max = lp_build_sub(coord_bld, coord_bld->one, min);
 588
 589          coord = lp_build_abs(coord_bld, coord);
 590          coord = lp_build_clamp(coord_bld, coord, min, max);
 591          coord = lp_build_mul(coord_bld, coord, length_f);
 592          if(0)coord = lp_build_sub(coord_bld, coord, half);
 593          weight = lp_build_fract(coord_bld, coord);
 594          coord0 = lp_build_ifloor(coord_bld, coord);
 595          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 596       }
 597       break;
 598
 599    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 600       {
 601          LLVMValueRef min, max;
 602          /* min = 1.0 / (2 * length) */
 603          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 604          /* max = 1.0 - min */
 605          max = lp_build_sub(coord_bld, coord_bld->one, min);
 606
 607          coord = lp_build_abs(coord_bld, coord);
 608          coord = lp_build_clamp(coord_bld, coord, min, max);
 609          coord = lp_build_mul(coord_bld, coord, length_f);
 610          coord = lp_build_sub(coord_bld, coord, half);
 611          weight = lp_build_fract(coord_bld, coord);
 612          coord0 = lp_build_ifloor(coord_bld, coord);
 613          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 614       }
 615       break;
 616
 617    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 618       {
 619          LLVMValueRef min, max;
 620          /* min = -1.0 / (2 * length) = -0.5 / length */
 621          min = lp_build_mul(coord_bld,
 622                             lp_build_const_scalar(coord_bld->type, -0.5F),
 623                             lp_build_rcp(coord_bld, length_f));
 624          /* max = 1.0 - min */
 625          max = lp_build_sub(coord_bld, coord_bld->one, min);
 626
 627          coord = lp_build_abs(coord_bld, coord);
 628          coord = lp_build_clamp(coord_bld, coord, min, max);
 629          coord = lp_build_mul(coord_bld, coord, length_f);
 630          coord = lp_build_sub(coord_bld, coord, half);
 631          weight = lp_build_fract(coord_bld, coord);
 632          coord0 = lp_build_ifloor(coord_bld, coord);
 633          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 634       }
 635       break;
 636
 637    default:
 638       assert(0);
 639       coord0 = NULL;
 640       coord1 = NULL;
 641       weight = NULL;
 642    }
 643
 644    *x0_out = coord0;
 645    *x1_out = coord1;
 646    *weight_out = weight;
 647 }
 648
 649
 650 /**
 651  * Build LLVM code for texture wrap mode for nearest filtering.
 652  * \param coord  the incoming texcoord (nominally in [0,1])
 653  * \param length  the texture size along one dimension, as int
 654  * \param is_pot  if TRUE, length is a power of two
 655  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 656  */
 657 static LLVMValueRef
 658 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 659                              LLVMValueRef coord,
 660                              LLVMValueRef length,
 661                              boolean is_pot,
 662                              unsigned wrap_mode)
 663 {
 664    struct lp_build_context *coord_bld = &bld->coord_bld;
 665    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 666    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 667    LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
 668    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 669    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 670    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
 671    LLVMValueRef icoord;
 672
 673    switch(wrap_mode) {
 674    case PIPE_TEX_WRAP_REPEAT:
 675       coord = lp_build_mul(coord_bld, coord, length_f);
 676       icoord = lp_build_ifloor(coord_bld, coord);
 677       if (is_pot)
 678          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 679       else
 680          /* Signed remainder won't give the right results for negative
 681           * dividends but unsigned remainder does.*/
 682          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 683       break;
 684
 685    case PIPE_TEX_WRAP_CLAMP:
 686       /* mul by size */
 687       if (bld->static_state->normalized_coords) {
 688          coord = lp_build_mul(coord_bld, coord, length_f);
 689       }
 690       /* floor */
 691       icoord = lp_build_ifloor(coord_bld, coord);
 692       /* clamp to [0, size-1].  Note: int coord builder type */
 693       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 694                               length_minus_one);
 695       break;
 696
 697    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 698       {
 699          LLVMValueRef min, max;
 700          if (bld->static_state->normalized_coords) {
 701             /* min = 1.0 / (2 * length) */
 702             min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 703             /* max = length - min */
 704             max = lp_build_sub(coord_bld, length_f, min);
 705             /* scale coord to length */
 706             coord = lp_build_mul(coord_bld, coord, length_f);
 707          }
 708          else {
 709             /* clamp to [0.5, length - 0.5] */
 710             min = lp_build_const_scalar(coord_bld->type, 0.5F);
 711             max = lp_build_sub(coord_bld, length_f, min);
 712          }
 713          /* coord = clamp(coord, min, max) */
 714          coord = lp_build_clamp(coord_bld, coord, min, max);
 715          icoord = lp_build_ifloor(coord_bld, coord);
 716       }
 717       break;
 718
 719    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 720       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 721       {
 722          LLVMValueRef min, max;
 723          if (bld->static_state->normalized_coords) {
 724             /* min = -1.0 / (2 * length) = -0.5 / length */
 725             min = lp_build_mul(coord_bld,
 726                                lp_build_const_scalar(coord_bld->type, -0.5F),
 727                                lp_build_rcp(coord_bld, length_f));
 728             /* max = length - min */
 729             max = lp_build_sub(coord_bld, length_f, min);
 730             /* scale coord to length */
 731             coord = lp_build_mul(coord_bld, coord, length_f);
 732          }
 733          else {
 734             /* clamp to [-0.5, length + 0.5] */
 735             min = lp_build_const_scalar(coord_bld->type, -0.5F);
 736             max = lp_build_sub(coord_bld, length_f, min);
 737          }
 738          /* coord = clamp(coord, min, max) */
 739          coord = lp_build_clamp(coord_bld, coord, min, max);
 740          icoord = lp_build_ifloor(coord_bld, coord);
 741       }
 742       break;
 743
 744    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 745       {
 746          LLVMValueRef min, max;
 747          /* min = 1.0 / (2 * length) */
 748          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 749          /* max = length - min */
 750          max = lp_build_sub(coord_bld, length_f, min);
 751
 752          /* compute mirror function */
 753          coord = lp_build_coord_mirror(bld, coord);
 754
 755          /* scale coord to length */
 756          coord = lp_build_mul(coord_bld, coord, length_f);
 757
 758          /* coord = clamp(coord, min, max) */
 759          coord = lp_build_clamp(coord_bld, coord, min, max);
 760          icoord = lp_build_ifloor(coord_bld, coord);
 761       }
 762       break;
 763
 764    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 765       coord = lp_build_abs(coord_bld, coord);
 766       coord = lp_build_mul(coord_bld, coord, length_f);
 767       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
 768       icoord = lp_build_ifloor(coord_bld, coord);
 769       break;
 770
 771    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 772       {
 773          LLVMValueRef min, max;
 774          /* min = 1.0 / (2 * length) */
 775          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 776          /* max = length - min */
 777          max = lp_build_sub(coord_bld, length_f, min);
 778
 779          coord = lp_build_abs(coord_bld, coord);
 780          coord = lp_build_mul(coord_bld, coord, length_f);
 781          coord = lp_build_clamp(coord_bld, coord, min, max);
 782          icoord = lp_build_ifloor(coord_bld, coord);
 783       }
 784       break;
 785
 786    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 787       {
 788          LLVMValueRef min, max;
 789          /* min = 1.0 / (2 * length) */
 790          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 791          min = lp_build_negate(coord_bld, min);
 792          /* max = length - min */
 793          max = lp_build_sub(coord_bld, length_f, min);
 794
 795          coord = lp_build_abs(coord_bld, coord);
 796          coord = lp_build_mul(coord_bld, coord, length_f);
 797          coord = lp_build_clamp(coord_bld, coord, min, max);
 798          icoord = lp_build_ifloor(coord_bld, coord);
 799       }
 800       break;
 801
 802    default:
 803       assert(0);
 804       icoord = NULL;
 805    }
 806
 807    return icoord;
 808 }
 809
 810
 811 /**
 812  * Codegen equivalent for u_minify().
 813  * Return max(1, base_size >> level);
 814  */
 815 static LLVMValueRef
 816 lp_build_minify(struct lp_build_sample_context *bld,
 817                 LLVMValueRef base_size,
 818                 LLVMValueRef level)
 819 {
 820    LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
 821    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 822    return size;
 823 }
 824
 825
 826 /**
 827  * Generate code to compute texture level of detail (lambda).
 828  * \param s  vector of texcoord s values
 829  * \param t  vector of texcoord t values
 830  * \param r  vector of texcoord r values
 831  * \param width  scalar int texture width
 832  * \param height  scalar int texture height
 833  * \param depth  scalar int texture depth
 834  */
 835 static LLVMValueRef
 836 lp_build_lod_selector(struct lp_build_sample_context *bld,
 837                       LLVMValueRef s,
 838                       LLVMValueRef t,
 839                       LLVMValueRef r,
 840                       LLVMValueRef width,
 841                       LLVMValueRef height,
 842                       LLVMValueRef depth)
 843
 844 {
 845    const int dims = texture_dims(bld->static_state->target);
 846    struct lp_build_context *float_bld = &bld->float_bld;
 847    LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
 848    LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 849    LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
 850
 851    LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 852    LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
 853    LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
 854
 855    LLVMValueRef s0, s1, s2;
 856    LLVMValueRef t0, t1, t2;
 857    LLVMValueRef r0, r1, r2;
 858    LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
 859    LLVMValueRef rho, lod;
 860
 861    /*
 862     * dsdx = abs(s[1] - s[0]);
 863     * dsdy = abs(s[2] - s[0]);
 864     * dtdx = abs(t[1] - t[0]);
 865     * dtdy = abs(t[2] - t[0]);
 866     * drdx = abs(r[1] - r[0]);
 867     * drdy = abs(r[2] - r[0]);
 868     * XXX we're assuming a four-element quad in 2x2 layout here.
 869     */
 870    s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
 871    s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
 872    s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
 873    dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
 874    dsdx = lp_build_abs(float_bld, dsdx);
 875    dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
 876    dsdy = lp_build_abs(float_bld, dsdy);
 877    if (dims > 1) {
 878       t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
 879       t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
 880       t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
 881       dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
 882       dtdx = lp_build_abs(float_bld, dtdx);
 883       dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
 884       dtdy = lp_build_abs(float_bld, dtdy);
 885       if (dims > 2) {
 886          r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
 887          r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
 888          r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
 889          drdx = LLVMBuildSub(bld->builder, r1, r0, "");
 890          drdx = lp_build_abs(float_bld, drdx);
 891          drdy = LLVMBuildSub(bld->builder, r2, r0, "");
 892          drdy = lp_build_abs(float_bld, drdy);
 893       }
 894    }
 895
 896    /* Compute rho = max of all partial derivatives scaled by texture size.
 897     * XXX this could be vectorized somewhat
 898     */
 899    rho = LLVMBuildMul(bld->builder,
 900                       lp_build_max(float_bld, dsdx, dsdy),
 901                       lp_build_int_to_float(float_bld, width), "");
 902    if (dims > 1) {
 903       LLVMValueRef max;
 904       max = LLVMBuildMul(bld->builder,
 905                          lp_build_max(float_bld, dtdx, dtdy),
 906                          lp_build_int_to_float(float_bld, height), "");
 907       rho = lp_build_max(float_bld, rho, max);
 908       if (dims > 2) {
 909          max = LLVMBuildMul(bld->builder,
 910                             lp_build_max(float_bld, drdx, drdy),
 911                             lp_build_int_to_float(float_bld, depth), "");
 912          rho = lp_build_max(float_bld, rho, max);
 913       }
 914    }
 915
 916    /* compute lod = log2(rho) */
 917    lod = lp_build_log2(float_bld, rho);
 918
 919    /* add lod bias */
 920    lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
 921
 922    /* clamp lod */
 923    lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 924
 925    return lod;
 926 }
 927
 928
 929 /**
 930  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 931  * mipmap level index.
 932  * Note: this is all scalar code.
 933  * \param lod  scalar float texture level of detail
 934  * \param level_out  returns integer
 935  */
 936 static void
 937 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 938                            unsigned unit,
 939                            LLVMValueRef lod,
 940                            LLVMValueRef *level_out)
 941 {
 942    struct lp_build_context *float_bld = &bld->float_bld;
 943    struct lp_build_context *int_bld = &bld->int_bld;
 944    LLVMValueRef last_level, level;
 945
 946    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
 947
 948    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 949                                                bld->builder, unit);
 950
 951    /* convert float lod to integer */
 952    level = lp_build_iround(float_bld, lod);
 953
 954    /* clamp level to legal range of levels */
 955    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
 956 }
 957
 958
 959 /**
 960  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 961  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 962  * two mipmap levels and interpolate between them.
 963  */
 964 static void
 965 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 966                            unsigned unit,
 967                            LLVMValueRef lod,
 968                            LLVMValueRef *level0_out,
 969                            LLVMValueRef *level1_out,
 970                            LLVMValueRef *weight_out)
 971 {
 972    struct lp_build_context *float_bld = &bld->float_bld;
 973    struct lp_build_context *int_bld = &bld->int_bld;
 974    LLVMValueRef last_level, level;
 975
 976    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 977                                                bld->builder, unit);
 978
 979    /* convert float lod to integer */
 980    level = lp_build_ifloor(float_bld, lod);
 981
 982    /* compute level 0 and clamp to legal range of levels */
 983    *level0_out = lp_build_clamp(int_bld, level,
 984                                 int_bld->zero,
 985                                 last_level);
 986    /* compute level 1 and clamp to legal range of levels */
 987    *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
 988    *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero);
 989
 990    *weight_out = lp_build_fract(float_bld, lod);
 991 }
 992
 993
 994 /**
 995  * Generate code to sample a mipmap level with nearest filtering.
 996  */
 997 static void
 998 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 999                               LLVMValueRef width_vec,
1000                               LLVMValueRef height_vec,
1001                               LLVMValueRef depth_vec,
1002                               LLVMValueRef row_stride_vec,
1003                               LLVMValueRef img_stride_vec,
1004                               LLVMValueRef data_ptr,
1005                               LLVMValueRef s,
1006                               LLVMValueRef t,
1007                               LLVMValueRef r,
1008                               LLVMValueRef colors_out[4])
1009 {
1010    const int dims = texture_dims(bld->static_state->target);
1011    LLVMValueRef x, y, z;
1012
1013    /*
1014     * Compute integer texcoords.
1015     */
1016    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1017                                     bld->static_state->pot_width,
1018                                     bld->static_state->wrap_s);
1019    lp_build_name(x, "tex.x.wrapped");
1020
1021    if (dims >= 2) {
1022       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1023                                        bld->static_state->pot_height,
1024                                        bld->static_state->wrap_t);
1025       lp_build_name(y, "tex.y.wrapped");
1026
1027       if (dims == 3) {
1028          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1029                                           bld->static_state->pot_height,
1030                                           bld->static_state->wrap_r);
1031          lp_build_name(z, "tex.z.wrapped");
1032       }
1033       else {
1034          z = NULL;
1035       }
1036    }
1037    else {
1038       y = z = NULL;
1039    }
1040
1041    /*
1042     * Get texture colors.
1043     */
1044    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1045                              x, y, z,
1046                              row_stride_vec, img_stride_vec,
1047                              data_ptr, colors_out);
1048 }
1049
1050
1051 /**
1052  * Generate code to sample a mipmap level with linear filtering.
1053  * 1D, 2D and 3D images are suppored.
1054  */
1055 static void
1056 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1057                              LLVMValueRef width_vec,
1058                              LLVMValueRef height_vec,
1059                              LLVMValueRef depth_vec,
1060                              LLVMValueRef row_stride_vec,
1061                              LLVMValueRef img_stride_vec,
1062                              LLVMValueRef data_ptr,
1063                              LLVMValueRef s,
1064                              LLVMValueRef t,
1065                              LLVMValueRef r,
1066                              LLVMValueRef colors_out[4])
1067 {
1068    const int dims = texture_dims(bld->static_state->target);
1069    LLVMValueRef x0, y0, z0, x1, y1, z1;
1070    LLVMValueRef s_fpart, t_fpart, r_fpart;
1071    LLVMValueRef neighbors[2][2][4];
1072    int chan;
1073
1074    /*
1075     * Compute integer texcoords.
1076     */
1077    lp_build_sample_wrap_linear(bld, s, width_vec,
1078                                bld->static_state->pot_width,
1079                                bld->static_state->wrap_s,
1080                                &x0, &x1, &s_fpart);
1081    lp_build_name(x0, "tex.x0.wrapped");
1082    lp_build_name(x1, "tex.x1.wrapped");
1083
1084    if (dims >= 2) {
1085       lp_build_sample_wrap_linear(bld, t, height_vec,
1086                                   bld->static_state->pot_height,
1087                                   bld->static_state->wrap_t,
1088                                   &y0, &y1, &t_fpart);
1089       lp_build_name(y0, "tex.y0.wrapped");
1090       lp_build_name(y1, "tex.y1.wrapped");
1091
1092       if (dims == 3) {
1093          lp_build_sample_wrap_linear(bld, r, depth_vec,
1094                                      bld->static_state->pot_depth,
1095                                      bld->static_state->wrap_r,
1096                                      &z0, &z1, &r_fpart);
1097          lp_build_name(z0, "tex.z0.wrapped");
1098          lp_build_name(z1, "tex.z1.wrapped");
1099       }
1100       else {
1101          z0 = z1 = r_fpart = NULL;
1102       }
1103    }
1104    else {
1105       y0 = y1 = t_fpart = NULL;
1106       z0 = z1 = r_fpart = NULL;
1107    }
1108
1109    /*
1110     * Get texture colors.
1111     */
1112    /* get x0/x1 texels */
1113    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1114                              x0, y0, z0,
1115                              row_stride_vec, img_stride_vec,
1116                              data_ptr, neighbors[0][0]);
1117    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1118                              x1, y0, z0,
1119                              row_stride_vec, img_stride_vec,
1120                              data_ptr, neighbors[0][1]);
1121
1122    if (dims == 1) {
1123       /* Interpolate two samples from 1D image to produce one color */
1124       for (chan = 0; chan < 4; chan++) {
1125          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1126                                           neighbors[0][0][chan],
1127                                           neighbors[0][1][chan]);
1128       }
1129    }
1130    else {
1131       /* 2D/3D texture */
1132       LLVMValueRef colors0[4];
1133
1134       /* get x0/x1 texels at y1 */
1135       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1136                                 x0, y1, z0,
1137                                 row_stride_vec, img_stride_vec,
1138                                 data_ptr, neighbors[1][0]);
1139       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1140                                 x1, y1, z0,
1141                                 row_stride_vec, img_stride_vec,
1142                                 data_ptr, neighbors[1][1]);
1143
1144       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1145       for (chan = 0; chan < 4; chan++) {
1146          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1147                                           s_fpart, t_fpart,
1148                                           neighbors[0][0][chan],
1149                                           neighbors[0][1][chan],
1150                                           neighbors[1][0][chan],
1151                                           neighbors[1][1][chan]);
1152       }
1153
1154       if (dims == 3) {
1155          LLVMValueRef neighbors1[2][2][4];
1156          LLVMValueRef colors1[4];
1157
1158          /* get x0/x1/y0/y1 texels at z1 */
1159          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1160                                    x0, y0, z1,
1161                                    row_stride_vec, img_stride_vec,
1162                                    data_ptr, neighbors1[0][0]);
1163          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1164                                    x1, y0, z1,
1165                                    row_stride_vec, img_stride_vec,
1166                                    data_ptr, neighbors1[0][1]);
1167          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1168                                    x0, y1, z1,
1169                                    row_stride_vec, img_stride_vec,
1170                                    data_ptr, neighbors1[1][0]);
1171          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1172                                    x1, y1, z1,
1173                                    row_stride_vec, img_stride_vec,
1174                                    data_ptr, neighbors1[1][1]);
1175
1176          /* Bilinear interpolate the four samples from the second Z slice */
1177          for (chan = 0; chan < 4; chan++) {
1178             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1179                                              s_fpart, t_fpart,
1180                                              neighbors1[0][0][chan],
1181                                              neighbors1[0][1][chan],
1182                                              neighbors1[1][0][chan],
1183                                              neighbors1[1][1][chan]);
1184          }
1185
1186          /* Linearly interpolate the two samples from the two 3D slices */
1187          for (chan = 0; chan < 4; chan++) {
1188             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1189                                              r_fpart,
1190                                              colors0[chan], colors1[chan]);
1191          }
1192       }
1193       else {
1194          /* 2D tex */
1195          for (chan = 0; chan < 4; chan++) {
1196             colors_out[chan] = colors0[chan];
1197          }
1198       }
1199    }
1200 }
1201
1202
1203
1204 /**
1205  * General texture sampling codegen.
1206  * This function handles texture sampling for all texture targets (1D,
1207  * 2D, 3D, cube) and all filtering modes.
1208  */
1209 static void
1210 lp_build_sample_general(struct lp_build_sample_context *bld,
1211                         unsigned unit,
1212                         LLVMValueRef s,
1213                         LLVMValueRef t,
1214                         LLVMValueRef r,
1215                         LLVMValueRef width,
1216                         LLVMValueRef height,
1217                         LLVMValueRef depth,
1218                         LLVMValueRef width_vec,
1219                         LLVMValueRef height_vec,
1220                         LLVMValueRef depth_vec,
1221                         LLVMValueRef row_stride_array,
1222                         LLVMValueRef img_stride_vec,
1223                         LLVMValueRef data_array,
1224                         LLVMValueRef *colors_out)
1225 {
1226    const unsigned mip_filter = bld->static_state->min_mip_filter;
1227    const unsigned min_filter = bld->static_state->min_img_filter;
1228    const unsigned mag_filter = bld->static_state->mag_img_filter;
1229    const int dims = texture_dims(bld->static_state->target);
1230    LLVMValueRef lod, lod_fpart;
1231    LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
1232    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1233    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1234    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1235    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1236    LLVMValueRef data_ptr0, data_ptr1;
1237    int chan;
1238
1239    /*
1240    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1241           mip_filter, min_filter, mag_filter);
1242    */
1243
1244    /*
1245     * Compute the level of detail (mipmap level index(es)).
1246     */
1247    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1248       /* always use mip level 0 */
1249       ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1250    }
1251    else {
1252       /* compute float LOD */
1253       lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
1254
1255       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1256          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1257       }
1258       else {
1259          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1260          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1261                                     &lod_fpart);
1262          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1263       }
1264    }
1265
1266    /*
1267     * Convert scalar integer mipmap levels into vectors.
1268     */
1269    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1270    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1271       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1272
1273    /*
1274     * Compute width, height at mipmap level 'ilevel0'
1275     */
1276    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1277    if (dims >= 2) {
1278       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1279       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1280                                                       ilevel0);
1281       if (dims == 3) {
1282          depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1283       }
1284    }
1285    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1286       /* compute width, height, depth for second mipmap level at ilevel1 */
1287       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1288       if (dims >= 2) {
1289          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1290          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1291                                                          ilevel1);
1292          if (dims == 3) {
1293             depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1294          }
1295       }
1296    }
1297
1298    /*
1299     * Choose cube face, recompute texcoords.
1300     */
1301    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1302
1303    }
1304
1305    /*
1306     * Get pointer(s) to image data for mipmap level(s).
1307     */
1308    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1309    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1310       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1311    }
1312
1313    /*
1314     * Get/interpolate texture colors.
1315     */
1316    /* XXX temporarily force this path: */
1317    if (1 /*min_filter == mag_filter*/) {
1318       /* same filter for minification or magnification */
1319       LLVMValueRef colors0[4], colors1[4];
1320
1321       if (min_filter == PIPE_TEX_FILTER_NEAREST) {
1322          lp_build_sample_image_nearest(bld,
1323                                        width0_vec, height0_vec, depth0_vec,
1324                                        row_stride0_vec, img_stride0_vec,
1325                                        data_ptr0, s, t, r, colors0);
1326
1327          if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1328             /* sample the second mipmap level, and interp */
1329             lp_build_sample_image_nearest(bld,
1330                                           width1_vec, height1_vec, depth1_vec,
1331                                           row_stride1_vec, img_stride1_vec,
1332                                           data_ptr1, s, t, r, colors1);
1333          }
1334       }
1335       else {
1336          assert(min_filter == PIPE_TEX_FILTER_LINEAR);
1337
1338          lp_build_sample_image_linear(bld,
1339                                       width0_vec, height0_vec, depth0_vec,
1340                                       row_stride0_vec, img_stride0_vec,
1341                                       data_ptr0, s, t, r, colors0);
1342
1343
1344          if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1345             /* sample the second mipmap level, and interp */
1346             lp_build_sample_image_linear(bld,
1347                                          width1_vec, height1_vec, depth1_vec,
1348                                          row_stride1_vec, img_stride1_vec,
1349                                          data_ptr1, s, t, r, colors1);
1350          }
1351       }
1352
1353       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1354          /* interpolate samples from the two mipmap levels */
1355          for (chan = 0; chan < 4; chan++) {
1356             colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1357                                              colors0[chan], colors1[chan]);
1358          }
1359       }
1360       else {
1361          /* use first/only level's colors */
1362          for (chan = 0; chan < 4; chan++) {
1363             colors_out[chan] = colors0[chan];
1364          }
1365       }
1366    }
1367    else {
1368       /* emit conditional to choose min image filter or mag image filter
1369        * depending on the lod being >0 or <= 0, respectively.
1370        */
1371       abort();
1372    }
1373 }
1374
1375
1376
1377 static void
1378 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
1379                           struct lp_type dst_type,
1380                           LLVMValueRef packed,
1381                           LLVMValueRef *rgba)
1382 {
1383    LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
1384    unsigned chan;
1385
1386    /* Decode the input vector components */
1387    for (chan = 0; chan < 4; ++chan) {
1388       unsigned start = chan*8;
1389       unsigned stop = start + 8;
1390       LLVMValueRef input;
1391
1392       input = packed;
1393
1394       if(start)
1395          input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
1396
1397       if(stop < 32)
1398          input = LLVMBuildAnd(builder, input, mask, "");
1399
1400       input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
1401
1402       rgba[chan] = input;
1403    }
1404 }
1405
1406
1407 static void
1408 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1409                               LLVMValueRef s,
1410                               LLVMValueRef t,
1411                               LLVMValueRef width,
1412                               LLVMValueRef height,
1413                               LLVMValueRef stride_array,
1414                               LLVMValueRef data_array,
1415                               LLVMValueRef *texel)
1416 {
1417    LLVMBuilderRef builder = bld->builder;
1418    struct lp_build_context i32, h16, u8n;
1419    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1420    LLVMValueRef i32_c8, i32_c128, i32_c255;
1421    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1422    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1423    LLVMValueRef x0, x1;
1424    LLVMValueRef y0, y1;
1425    LLVMValueRef neighbors[2][2];
1426    LLVMValueRef neighbors_lo[2][2];
1427    LLVMValueRef neighbors_hi[2][2];
1428    LLVMValueRef packed, packed_lo, packed_hi;
1429    LLVMValueRef unswizzled[4];
1430    LLVMValueRef stride;
1431
1432    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1433    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1434    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1435
1436    i32_vec_type = lp_build_vec_type(i32.type);
1437    h16_vec_type = lp_build_vec_type(h16.type);
1438    u8n_vec_type = lp_build_vec_type(u8n.type);
1439
1440    if (bld->static_state->normalized_coords) {
1441       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1442       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1443       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1444       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1445       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1446    }
1447
1448    /* scale coords by 256 (8 fractional bits) */
1449    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1450    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1451
1452    /* convert float to int */
1453    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1454    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1455
1456    /* subtract 0.5 (add -128) */
1457    i32_c128 = lp_build_int_const_scalar(i32.type, -128);
1458    s = LLVMBuildAdd(builder, s, i32_c128, "");
1459    t = LLVMBuildAdd(builder, t, i32_c128, "");
1460
1461    /* compute floor (shift right 8) */
1462    i32_c8 = lp_build_int_const_scalar(i32.type, 8);
1463    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1464    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1465
1466    /* compute fractional part (AND with 0xff) */
1467    i32_c255 = lp_build_int_const_scalar(i32.type, 255);
1468    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1469    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1470
1471    x0 = s_ipart;
1472    y0 = t_ipart;
1473
1474    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1475    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1476
1477    x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1478                                  bld->static_state->wrap_s);
1479    y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1480                                  bld->static_state->wrap_t);
1481
1482    x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1483                                  bld->static_state->wrap_s);
1484    y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1485                                  bld->static_state->wrap_t);
1486
1487    /*
1488     * Transform 4 x i32 in
1489     *
1490     *   s_fpart = {s0, s1, s2, s3}
1491     *
1492     * into 8 x i16
1493     *
1494     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1495     *
1496     * into two 8 x i16
1497     *
1498     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1499     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1500     *
1501     * and likewise for t_fpart. There is no risk of loosing precision here
1502     * since the fractional parts only use the lower 8bits.
1503     */
1504
1505    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1506    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1507
1508    {
1509       LLVMTypeRef elem_type = LLVMInt32Type();
1510       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1511       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1512       LLVMValueRef shuffle_lo;
1513       LLVMValueRef shuffle_hi;
1514       unsigned i, j;
1515
1516       for(j = 0; j < h16.type.length; j += 4) {
1517          unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1518          LLVMValueRef index;
1519
1520          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1521          for(i = 0; i < 4; ++i)
1522             shuffles_lo[j + i] = index;
1523
1524          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1525          for(i = 0; i < 4; ++i)
1526             shuffles_hi[j + i] = index;
1527       }
1528
1529       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1530       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1531
1532       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1533       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1534       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1535       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1536    }
1537
1538    stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1539
1540    /*
1541     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1542     *
1543     *   rgba0 rgba1 rgba2 rgba3
1544     *
1545     * bit cast them into 16 x u8
1546     *
1547     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1548     *
1549     * unpack them into two 8 x i16:
1550     *
1551     *   r0 g0 b0 a0 r1 g1 b1 a1
1552     *   r2 g2 b2 a2 r3 g3 b3 a3
1553     *
1554     * The higher 8 bits of the resulting elements will be zero.
1555     */
1556
1557    neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1558    neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1559    neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1560    neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1561
1562    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1563    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1564    neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1565    neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1566
1567    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1568    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1569    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1570    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1571
1572    /*
1573     * Linear interpolate with 8.8 fixed point.
1574     */
1575
1576    packed_lo = lp_build_lerp_2d(&h16,
1577                                 s_fpart_lo, t_fpart_lo,
1578                                 neighbors_lo[0][0],
1579                                 neighbors_lo[0][1],
1580                                 neighbors_lo[1][0],
1581                                 neighbors_lo[1][1]);
1582
1583    packed_hi = lp_build_lerp_2d(&h16,
1584                                 s_fpart_hi, t_fpart_hi,
1585                                 neighbors_hi[0][0],
1586                                 neighbors_hi[0][1],
1587                                 neighbors_hi[1][0],
1588                                 neighbors_hi[1][1]);
1589
1590    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1591
1592    /*
1593     * Convert to SoA and swizzle.
1594     */
1595
1596    packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
1597
1598    lp_build_rgba8_to_f32_soa(bld->builder,
1599                              bld->texel_type,
1600                              packed, unswizzled);
1601
1602    lp_build_format_swizzle_soa(bld->format_desc,
1603                                bld->texel_type, unswizzled,
1604                                texel);
1605 }
1606
1607
1608 static void
1609 lp_build_sample_compare(struct lp_build_sample_context *bld,
1610                         LLVMValueRef p,
1611                         LLVMValueRef *texel)
1612 {
1613    struct lp_build_context *texel_bld = &bld->texel_bld;
1614    LLVMValueRef res;
1615    unsigned chan;
1616
1617    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1618       return;
1619
1620    /* TODO: Compare before swizzling, to avoid redundant computations */
1621    res = NULL;
1622    for(chan = 0; chan < 4; ++chan) {
1623       LLVMValueRef cmp;
1624       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1625       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1626
1627       if(res)
1628          res = lp_build_add(texel_bld, res, cmp);
1629       else
1630          res = cmp;
1631    }
1632
1633    assert(res);
1634    res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
1635
1636    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1637    for(chan = 0; chan < 3; ++chan)
1638       texel[chan] = res;
1639    texel[3] = texel_bld->one;
1640 }
1641
1642
1643 /**
1644  * Build texture sampling code.
1645  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1646  * R, G, B, A.
1647  * \param type  vector float type to use for coords, etc.
1648  */
1649 void
1650 lp_build_sample_soa(LLVMBuilderRef builder,
1651                     const struct lp_sampler_static_state *static_state,
1652                     struct lp_sampler_dynamic_state *dynamic_state,
1653                     struct lp_type type,
1654                     unsigned unit,
1655                     unsigned num_coords,
1656                     const LLVMValueRef *coords,
1657                     LLVMValueRef lodbias,
1658                     LLVMValueRef *texel)
1659 {
1660    struct lp_build_sample_context bld;
1661    LLVMValueRef width, width_vec;
1662    LLVMValueRef height, height_vec;
1663    LLVMValueRef depth, depth_vec;
1664    LLVMValueRef stride_array;
1665    LLVMValueRef data_array;
1666    LLVMValueRef s;
1667    LLVMValueRef t;
1668    LLVMValueRef r;
1669
1670    (void) lp_build_lod_selector;   /* temporary to silence warning */
1671    (void) lp_build_nearest_mip_level;
1672    (void) lp_build_linear_mip_levels;
1673    (void) lp_build_minify;
1674
1675    /* Setup our build context */
1676    memset(&bld, 0, sizeof bld);
1677    bld.builder = builder;
1678    bld.static_state = static_state;
1679    bld.dynamic_state = dynamic_state;
1680    bld.format_desc = util_format_description(static_state->format);
1681
1682    bld.float_type = lp_type_float(32);
1683    bld.int_type = lp_type_int(32);
1684    bld.coord_type = type;
1685    bld.uint_coord_type = lp_uint_type(type);
1686    bld.int_coord_type = lp_int_type(type);
1687    bld.texel_type = type;
1688
1689    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
1690    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
1691    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
1692    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
1693    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
1694    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
1695
1696    /* Get the dynamic state */
1697    width = dynamic_state->width(dynamic_state, builder, unit);
1698    height = dynamic_state->height(dynamic_state, builder, unit);
1699    depth = dynamic_state->depth(dynamic_state, builder, unit);
1700    stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
1701    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
1702    /* Note that data_array is an array[level] of pointers to texture images */
1703
1704    s = coords[0];
1705    t = coords[1];
1706    r = coords[2];
1707
1708    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
1709    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
1710    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
1711
1712    if (lp_format_is_rgba8(bld.format_desc) &&
1713        static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
1714        static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
1715        static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
1716        is_simple_wrap_mode(static_state->wrap_s) &&
1717        is_simple_wrap_mode(static_state->wrap_t)) {
1718       /* special case */
1719       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
1720                                     stride_array, data_array, texel);
1721    }
1722    else {
1723       lp_build_sample_general(&bld, unit, s, t, r,
1724                               width, height, depth,
1725                               width_vec, height_vec, depth_vec,
1726                               stride_array, NULL, data_array,
1727                               texel);
1728    }
1729
1730    lp_build_sample_compare(&bld, r, texel);
1731 }