src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include "pipe/p_defines.h"
  36 #include "pipe/p_state.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_dump.h"
  39 #include "util/u_memory.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_cpu_detect.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_flow.h"
  52 #include "lp_bld_format.h"
  53 #include "lp_bld_sample.h"
  54
  55
  56 /**
  57  * Keep all information for sampling code generation in a single place.
  58  */
  59 struct lp_build_sample_context
  60 {
  61    LLVMBuilderRef builder;
  62
  63    const struct lp_sampler_static_state *static_state;
  64
  65    struct lp_sampler_dynamic_state *dynamic_state;
  66
  67    const struct util_format_description *format_desc;
  68
  69    /** regular scalar float type */
  70    struct lp_type float_type;
  71    struct lp_build_context float_bld;
  72
  73    /** regular scalar float type */
  74    struct lp_type int_type;
  75    struct lp_build_context int_bld;
  76
  77    /** Incoming coordinates type and build context */
  78    struct lp_type coord_type;
  79    struct lp_build_context coord_bld;
  80
  81    /** Unsigned integer coordinates */
  82    struct lp_type uint_coord_type;
  83    struct lp_build_context uint_coord_bld;
  84
  85    /** Signed integer coordinates */
  86    struct lp_type int_coord_type;
  87    struct lp_build_context int_coord_bld;
  88
  89    /** Output texels type and build context */
  90    struct lp_type texel_type;
  91    struct lp_build_context texel_bld;
  92 };
  93
  94
  95 /**
  96  * Does the given texture wrap mode allow sampling the texture border color?
  97  * XXX maybe move this into gallium util code.
  98  */
  99 static boolean
 100 wrap_mode_uses_border_color(unsigned mode)
 101 {
 102    switch (mode) {
 103    case PIPE_TEX_WRAP_REPEAT:
 104    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 105    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 106    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 107       return FALSE;
 108    case PIPE_TEX_WRAP_CLAMP:
 109    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 110    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 111    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 112       return TRUE;
 113    default:
 114       assert(0 && "unexpected wrap mode");
 115       return FALSE;
 116    }
 117 }
 118
 119
 120 static LLVMValueRef
 121 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 122                           LLVMValueRef data_array, LLVMValueRef level)
 123 {
 124    LLVMValueRef indexes[2], data_ptr;
 125    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 126    indexes[1] = level;
 127    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 128    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 129    return data_ptr;
 130 }
 131
 132
 133 static LLVMValueRef
 134 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 135                                 LLVMValueRef data_array, int level)
 136 {
 137    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 138    return lp_build_get_mipmap_level(bld, data_array, lvl);
 139 }
 140
 141
 142 /**
 143  * Dereference stride_array[mipmap_level] array to get a stride.
 144  * Return stride as a vector.
 145  */
 146 static LLVMValueRef
 147 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 148                               LLVMValueRef stride_array, LLVMValueRef level)
 149 {
 150    LLVMValueRef indexes[2], stride;
 151    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 152    indexes[1] = level;
 153    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 154    stride = LLVMBuildLoad(bld->builder, stride, "");
 155    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 156    return stride;
 157 }
 158
 159
 160 /** Dereference stride_array[0] array to get a stride (as vector). */
 161 static LLVMValueRef
 162 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 163                                     LLVMValueRef stride_array, int level)
 164 {
 165    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 166    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 167 }
 168
 169
 170 static int
 171 texture_dims(enum pipe_texture_target tex)
 172 {
 173    switch (tex) {
 174    case PIPE_TEXTURE_1D:
 175       return 1;
 176    case PIPE_TEXTURE_2D:
 177    case PIPE_TEXTURE_CUBE:
 178       return 2;
 179    case PIPE_TEXTURE_3D:
 180       return 3;
 181    default:
 182       assert(0 && "bad texture target in texture_dims()");
 183       return 2;
 184    }
 185 }
 186
 187
 188
 189 /**
 190  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 191  * The computation depends on whether the texture is 1D, 2D or 3D.
 192  * The result, texel, will be:
 193  *   texel[0] = red values
 194  *   texel[1] = green values
 195  *   texel[2] = blue values
 196  *   texel[3] = alpha values
 197  */
 198 static void
 199 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 200                           LLVMValueRef width,
 201                           LLVMValueRef height,
 202                           LLVMValueRef depth,
 203                           LLVMValueRef x,
 204                           LLVMValueRef y,
 205                           LLVMValueRef z,
 206                           LLVMValueRef y_stride,
 207                           LLVMValueRef z_stride,
 208                           LLVMValueRef data_ptr,
 209                           LLVMValueRef *texel)
 210 {
 211    const int dims = texture_dims(bld->static_state->target);
 212    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 213    LLVMValueRef offset;
 214    LLVMValueRef i, j;
 215    LLVMValueRef use_border = NULL;
 216
 217    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 218    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 219       LLVMValueRef b1, b2;
 220       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 221       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 222       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 223    }
 224
 225    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 226       LLVMValueRef b1, b2;
 227       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 228       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 229       if (use_border) {
 230          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 231          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 232       }
 233       else {
 234          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 235       }
 236    }
 237
 238    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 239       LLVMValueRef b1, b2;
 240       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 241       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 242       if (use_border) {
 243          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 244          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 245       }
 246       else {
 247          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 248       }
 249    }
 250
 251    /*
 252     * Describe the coordinates in terms of pixel blocks.
 253     *
 254     * TODO: pixel blocks are power of two. LLVM should convert rem/div to
 255     * bit arithmetic. Verify this.
 256     */
 257
 258    if (bld->format_desc->block.width == 1) {
 259       i = bld->uint_coord_bld.zero;
 260    }
 261    else {
 262       LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
 263       i = LLVMBuildURem(bld->builder, x, block_width, "");
 264       x = LLVMBuildUDiv(bld->builder, x, block_width, "");
 265    }
 266
 267    if (bld->format_desc->block.height == 1) {
 268       j = bld->uint_coord_bld.zero;
 269    }
 270    else {
 271       LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
 272       j = LLVMBuildURem(bld->builder, y, block_height, "");
 273       y = LLVMBuildUDiv(bld->builder, y, block_height, "");
 274    }
 275
 276    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 277    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 278                                    bld->format_desc,
 279                                    x, y, z, y_stride, z_stride);
 280
 281    lp_build_fetch_rgba_soa(bld->builder,
 282                            bld->format_desc,
 283                            bld->texel_type,
 284                            data_ptr, offset,
 285                            i, j,
 286                            texel);
 287
 288    /*
 289     * Note: if we find an app which frequently samples the texture border
 290     * we might want to implement a true conditional here to avoid sampling
 291     * the texture whenever possible (since that's quite a bit of code).
 292     * Ex:
 293     *   if (use_border) {
 294     *      texel = border_color;
 295     *   }
 296     *   else {
 297     *      texel = sample_texture(coord);
 298     *   }
 299     * As it is now, we always sample the texture, then selectively replace
 300     * the texel color results with the border color.
 301     */
 302
 303    if (use_border) {
 304       /* select texel color or border color depending on use_border */
 305       int chan;
 306       for (chan = 0; chan < 4; chan++) {
 307          LLVMValueRef border_chan =
 308             lp_build_const_vec(bld->texel_type,
 309                                   bld->static_state->border_color[chan]);
 310          texel[chan] = lp_build_select(&bld->texel_bld, use_border,
 311                                        border_chan, texel[chan]);
 312       }
 313    }
 314 }
 315
 316
 317 static LLVMValueRef
 318 lp_build_sample_packed(struct lp_build_sample_context *bld,
 319                        LLVMValueRef x,
 320                        LLVMValueRef y,
 321                        LLVMValueRef y_stride,
 322                        LLVMValueRef data_array)
 323 {
 324    LLVMValueRef offset;
 325    LLVMValueRef data_ptr;
 326
 327    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 328                                    bld->format_desc,
 329                                    x, y, NULL, y_stride, NULL);
 330
 331    assert(bld->format_desc->block.width == 1);
 332    assert(bld->format_desc->block.height == 1);
 333    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 334
 335    /* get pointer to mipmap level 0 data */
 336    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 337
 338    return lp_build_gather(bld->builder,
 339                           bld->texel_type.length,
 340                           bld->format_desc->block.bits,
 341                           bld->texel_type.width,
 342                           data_ptr, offset);
 343 }
 344
 345
 346 /**
 347  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 348  */
 349 static LLVMValueRef
 350 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 351                       LLVMValueRef coord)
 352 {
 353    struct lp_build_context *coord_bld = &bld->coord_bld;
 354    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 355    LLVMValueRef fract, flr, isOdd;
 356
 357    /* fract = coord - floor(coord) */
 358    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 359
 360    /* flr = ifloor(coord); */
 361    flr = lp_build_ifloor(coord_bld, coord);
 362
 363    /* isOdd = flr & 1 */
 364    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 365
 366    /* make coord positive or negative depending on isOdd */
 367    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 368
 369    /* convert isOdd to float */
 370    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 371
 372    /* add isOdd to coord */
 373    coord = lp_build_add(coord_bld, coord, isOdd);
 374
 375    return coord;
 376 }
 377
 378
 379 /**
 380  * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
 381  * Return whether the given mode is supported by that function.
 382  */
 383 static boolean
 384 is_simple_wrap_mode(unsigned mode)
 385 {
 386    switch (mode) {
 387    case PIPE_TEX_WRAP_REPEAT:
 388    case PIPE_TEX_WRAP_CLAMP:
 389    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 390       return TRUE;
 391    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 392    default:
 393       return FALSE;
 394    }
 395 }
 396
 397
 398 /**
 399  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 400  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 401  * \param length  the texture size along one dimension
 402  * \param is_pot  if TRUE, length is a power of two
 403  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 404  */
 405 static LLVMValueRef
 406 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
 407                          LLVMValueRef coord,
 408                          LLVMValueRef length,
 409                          boolean is_pot,
 410                          unsigned wrap_mode)
 411 {
 412    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 413    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 414    LLVMValueRef length_minus_one;
 415
 416    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 417
 418    switch(wrap_mode) {
 419    case PIPE_TEX_WRAP_REPEAT:
 420       if(is_pot)
 421          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 422       else
 423          /* Signed remainder won't give the right results for negative
 424           * dividends but unsigned remainder does.*/
 425          coord = LLVMBuildURem(bld->builder, coord, length, "");
 426       break;
 427
 428    case PIPE_TEX_WRAP_CLAMP:
 429    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 430    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 431       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 432       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 433       break;
 434
 435    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 436    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 437    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 438    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 439       /* FIXME */
 440       _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
 441                     util_dump_tex_wrap(wrap_mode, TRUE));
 442       coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
 443       coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
 444       break;
 445
 446    default:
 447       assert(0);
 448    }
 449
 450    return coord;
 451 }
 452
 453
 454 /**
 455  * Build LLVM code for texture wrap mode for linear filtering.
 456  * \param x0_out  returns first integer texcoord
 457  * \param x1_out  returns second integer texcoord
 458  * \param weight_out  returns linear interpolation weight
 459  */
 460 static void
 461 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 462                             LLVMValueRef coord,
 463                             LLVMValueRef length,
 464                             boolean is_pot,
 465                             unsigned wrap_mode,
 466                             LLVMValueRef *x0_out,
 467                             LLVMValueRef *x1_out,
 468                             LLVMValueRef *weight_out)
 469 {
 470    struct lp_build_context *coord_bld = &bld->coord_bld;
 471    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 472    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 473    LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
 474    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
 475    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 476    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 477    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
 478    LLVMValueRef coord0, coord1, weight;
 479
 480    switch(wrap_mode) {
 481    case PIPE_TEX_WRAP_REPEAT:
 482       /* mul by size and subtract 0.5 */
 483       coord = lp_build_mul(coord_bld, coord, length_f);
 484       coord = lp_build_sub(coord_bld, coord, half);
 485       /* convert to int */
 486       coord0 = lp_build_ifloor(coord_bld, coord);
 487       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 488       /* compute lerp weight */
 489       weight = lp_build_fract(coord_bld, coord);
 490       /* repeat wrap */
 491       if (is_pot) {
 492          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 493          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 494       }
 495       else {
 496          /* Signed remainder won't give the right results for negative
 497           * dividends but unsigned remainder does.*/
 498          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 499          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 500       }
 501       break;
 502
 503    case PIPE_TEX_WRAP_CLAMP:
 504       if (bld->static_state->normalized_coords) {
 505          coord = lp_build_mul(coord_bld, coord, length_f);
 506       }
 507       weight = lp_build_fract(coord_bld, coord);
 508       coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 509                               length_f_minus_one);
 510       coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 511       coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
 512                               length_f_minus_one);
 513       coord0 = lp_build_ifloor(coord_bld, coord0);
 514       coord1 = lp_build_ifloor(coord_bld, coord1);
 515       break;
 516
 517    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 518       if (bld->static_state->normalized_coords) {
 519          /* clamp to [0,1] */
 520          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 521          /* mul by tex size and subtract 0.5 */
 522          coord = lp_build_mul(coord_bld, coord, length_f);
 523          coord = lp_build_sub(coord_bld, coord, half);
 524       }
 525       else {
 526          LLVMValueRef min, max;
 527          /* clamp to [0.5, length - 0.5] */
 528          min = lp_build_const_vec(coord_bld->type, 0.5F);
 529          max = lp_build_sub(coord_bld, length_f, min);
 530          coord = lp_build_clamp(coord_bld, coord, min, max);
 531       }
 532       /* compute lerp weight */
 533       weight = lp_build_fract(coord_bld, coord);
 534       /* coord0 = floor(coord); */
 535       coord0 = lp_build_ifloor(coord_bld, coord);
 536       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 537       /* coord0 = max(coord0, 0) */
 538       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 539       /* coord1 = min(coord1, length-1) */
 540       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 541       break;
 542
 543    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 544       {
 545          LLVMValueRef min, max;
 546          if (bld->static_state->normalized_coords) {
 547             /* min = -1.0 / (2 * length) = -0.5 / length */
 548             min = lp_build_mul(coord_bld,
 549                                lp_build_const_vec(coord_bld->type, -0.5F),
 550                                lp_build_rcp(coord_bld, length_f));
 551             /* max = 1.0 - min */
 552             max = lp_build_sub(coord_bld, coord_bld->one, min);
 553             /* coord = clamp(coord, min, max) */
 554             coord = lp_build_clamp(coord_bld, coord, min, max);
 555             /* scale coord to length (and sub 0.5?) */
 556             coord = lp_build_mul(coord_bld, coord, length_f);
 557             coord = lp_build_sub(coord_bld, coord, half);
 558          }
 559          else {
 560             /* clamp to [-0.5, length + 0.5] */
 561             min = lp_build_const_vec(coord_bld->type, -0.5F);
 562             max = lp_build_sub(coord_bld, length_f, min);
 563             coord = lp_build_clamp(coord_bld, coord, min, max);
 564             coord = lp_build_sub(coord_bld, coord, half);
 565          }
 566          /* compute lerp weight */
 567          weight = lp_build_fract(coord_bld, coord);
 568          /* convert to int */
 569          coord0 = lp_build_ifloor(coord_bld, coord);
 570          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 571       }
 572       break;
 573
 574    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 575       /* compute mirror function */
 576       coord = lp_build_coord_mirror(bld, coord);
 577
 578       /* scale coord to length */
 579       coord = lp_build_mul(coord_bld, coord, length_f);
 580       coord = lp_build_sub(coord_bld, coord, half);
 581
 582       /* compute lerp weight */
 583       weight = lp_build_fract(coord_bld, coord);
 584
 585       /* convert to int coords */
 586       coord0 = lp_build_ifloor(coord_bld, coord);
 587       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 588
 589       /* coord0 = max(coord0, 0) */
 590       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 591       /* coord1 = min(coord1, length-1) */
 592       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 593       break;
 594
 595    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 596       {
 597          LLVMValueRef min, max;
 598          /* min = 1.0 / (2 * length) */
 599          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 600          /* max = 1.0 - min */
 601          max = lp_build_sub(coord_bld, coord_bld->one, min);
 602
 603          coord = lp_build_abs(coord_bld, coord);
 604          coord = lp_build_clamp(coord_bld, coord, min, max);
 605          coord = lp_build_mul(coord_bld, coord, length_f);
 606          if(0)coord = lp_build_sub(coord_bld, coord, half);
 607          weight = lp_build_fract(coord_bld, coord);
 608          coord0 = lp_build_ifloor(coord_bld, coord);
 609          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 610       }
 611       break;
 612
 613    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 614       {
 615          LLVMValueRef min, max;
 616          /* min = 1.0 / (2 * length) */
 617          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 618          /* max = 1.0 - min */
 619          max = lp_build_sub(coord_bld, coord_bld->one, min);
 620
 621          coord = lp_build_abs(coord_bld, coord);
 622          coord = lp_build_clamp(coord_bld, coord, min, max);
 623          coord = lp_build_mul(coord_bld, coord, length_f);
 624          coord = lp_build_sub(coord_bld, coord, half);
 625          weight = lp_build_fract(coord_bld, coord);
 626          coord0 = lp_build_ifloor(coord_bld, coord);
 627          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 628       }
 629       break;
 630
 631    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 632       {
 633          LLVMValueRef min, max;
 634          /* min = -1.0 / (2 * length) = -0.5 / length */
 635          min = lp_build_mul(coord_bld,
 636                             lp_build_const_vec(coord_bld->type, -0.5F),
 637                             lp_build_rcp(coord_bld, length_f));
 638          /* max = 1.0 - min */
 639          max = lp_build_sub(coord_bld, coord_bld->one, min);
 640
 641          coord = lp_build_abs(coord_bld, coord);
 642          coord = lp_build_clamp(coord_bld, coord, min, max);
 643          coord = lp_build_mul(coord_bld, coord, length_f);
 644          coord = lp_build_sub(coord_bld, coord, half);
 645          weight = lp_build_fract(coord_bld, coord);
 646          coord0 = lp_build_ifloor(coord_bld, coord);
 647          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 648       }
 649       break;
 650
 651    default:
 652       assert(0);
 653       coord0 = NULL;
 654       coord1 = NULL;
 655       weight = NULL;
 656    }
 657
 658    *x0_out = coord0;
 659    *x1_out = coord1;
 660    *weight_out = weight;
 661 }
 662
 663
 664 /**
 665  * Build LLVM code for texture wrap mode for nearest filtering.
 666  * \param coord  the incoming texcoord (nominally in [0,1])
 667  * \param length  the texture size along one dimension, as int
 668  * \param is_pot  if TRUE, length is a power of two
 669  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 670  */
 671 static LLVMValueRef
 672 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 673                              LLVMValueRef coord,
 674                              LLVMValueRef length,
 675                              boolean is_pot,
 676                              unsigned wrap_mode)
 677 {
 678    struct lp_build_context *coord_bld = &bld->coord_bld;
 679    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 680    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 681    LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
 682    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 683    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 684    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
 685    LLVMValueRef icoord;
 686
 687    switch(wrap_mode) {
 688    case PIPE_TEX_WRAP_REPEAT:
 689       coord = lp_build_mul(coord_bld, coord, length_f);
 690       icoord = lp_build_ifloor(coord_bld, coord);
 691       if (is_pot)
 692          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 693       else
 694          /* Signed remainder won't give the right results for negative
 695           * dividends but unsigned remainder does.*/
 696          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 697       break;
 698
 699    case PIPE_TEX_WRAP_CLAMP:
 700       /* mul by size */
 701       if (bld->static_state->normalized_coords) {
 702          coord = lp_build_mul(coord_bld, coord, length_f);
 703       }
 704       /* floor */
 705       icoord = lp_build_ifloor(coord_bld, coord);
 706       /* clamp to [0, size-1].  Note: int coord builder type */
 707       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 708                               length_minus_one);
 709       break;
 710
 711    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 712       {
 713          LLVMValueRef min, max;
 714          if (bld->static_state->normalized_coords) {
 715             /* min = 1.0 / (2 * length) */
 716             min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 717             /* max = length - min */
 718             max = lp_build_sub(coord_bld, length_f, min);
 719             /* scale coord to length */
 720             coord = lp_build_mul(coord_bld, coord, length_f);
 721          }
 722          else {
 723             /* clamp to [0.5, length - 0.5] */
 724             min = lp_build_const_vec(coord_bld->type, 0.5F);
 725             max = lp_build_sub(coord_bld, length_f, min);
 726          }
 727          /* coord = clamp(coord, min, max) */
 728          coord = lp_build_clamp(coord_bld, coord, min, max);
 729          icoord = lp_build_ifloor(coord_bld, coord);
 730       }
 731       break;
 732
 733    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 734       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 735       {
 736          LLVMValueRef min, max;
 737          if (bld->static_state->normalized_coords) {
 738             /* min = -1.0 / (2 * length) = -0.5 / length */
 739             min = lp_build_mul(coord_bld,
 740                                lp_build_const_vec(coord_bld->type, -0.5F),
 741                                lp_build_rcp(coord_bld, length_f));
 742             /* max = length - min */
 743             max = lp_build_sub(coord_bld, length_f, min);
 744             /* scale coord to length */
 745             coord = lp_build_mul(coord_bld, coord, length_f);
 746          }
 747          else {
 748             /* clamp to [-0.5, length + 0.5] */
 749             min = lp_build_const_vec(coord_bld->type, -0.5F);
 750             max = lp_build_sub(coord_bld, length_f, min);
 751          }
 752          /* coord = clamp(coord, min, max) */
 753          coord = lp_build_clamp(coord_bld, coord, min, max);
 754          icoord = lp_build_ifloor(coord_bld, coord);
 755       }
 756       break;
 757
 758    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 759       {
 760          LLVMValueRef min, max;
 761          /* min = 1.0 / (2 * length) */
 762          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 763          /* max = length - min */
 764          max = lp_build_sub(coord_bld, length_f, min);
 765
 766          /* compute mirror function */
 767          coord = lp_build_coord_mirror(bld, coord);
 768
 769          /* scale coord to length */
 770          coord = lp_build_mul(coord_bld, coord, length_f);
 771
 772          /* coord = clamp(coord, min, max) */
 773          coord = lp_build_clamp(coord_bld, coord, min, max);
 774          icoord = lp_build_ifloor(coord_bld, coord);
 775       }
 776       break;
 777
 778    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 779       coord = lp_build_abs(coord_bld, coord);
 780       coord = lp_build_mul(coord_bld, coord, length_f);
 781       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
 782       icoord = lp_build_ifloor(coord_bld, coord);
 783       break;
 784
 785    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 786       {
 787          LLVMValueRef min, max;
 788          /* min = 1.0 / (2 * length) */
 789          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 790          /* max = length - min */
 791          max = lp_build_sub(coord_bld, length_f, min);
 792
 793          coord = lp_build_abs(coord_bld, coord);
 794          coord = lp_build_mul(coord_bld, coord, length_f);
 795          coord = lp_build_clamp(coord_bld, coord, min, max);
 796          icoord = lp_build_ifloor(coord_bld, coord);
 797       }
 798       break;
 799
 800    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 801       {
 802          LLVMValueRef min, max;
 803          /* min = 1.0 / (2 * length) */
 804          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 805          min = lp_build_negate(coord_bld, min);
 806          /* max = length - min */
 807          max = lp_build_sub(coord_bld, length_f, min);
 808
 809          coord = lp_build_abs(coord_bld, coord);
 810          coord = lp_build_mul(coord_bld, coord, length_f);
 811          coord = lp_build_clamp(coord_bld, coord, min, max);
 812          icoord = lp_build_ifloor(coord_bld, coord);
 813       }
 814       break;
 815
 816    default:
 817       assert(0);
 818       icoord = NULL;
 819    }
 820
 821    return icoord;
 822 }
 823
 824
 825 /**
 826  * Codegen equivalent for u_minify().
 827  * Return max(1, base_size >> level);
 828  */
 829 static LLVMValueRef
 830 lp_build_minify(struct lp_build_sample_context *bld,
 831                 LLVMValueRef base_size,
 832                 LLVMValueRef level)
 833 {
 834    LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
 835    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 836    return size;
 837 }
 838
 839
 840 /**
 841  * Generate code to compute texture level of detail (lambda).
 842  * \param s  vector of texcoord s values
 843  * \param t  vector of texcoord t values
 844  * \param r  vector of texcoord r values
 845  * \param width  scalar int texture width
 846  * \param height  scalar int texture height
 847  * \param depth  scalar int texture depth
 848  */
 849 static LLVMValueRef
 850 lp_build_lod_selector(struct lp_build_sample_context *bld,
 851                       LLVMValueRef s,
 852                       LLVMValueRef t,
 853                       LLVMValueRef r,
 854                       LLVMValueRef width,
 855                       LLVMValueRef height,
 856                       LLVMValueRef depth)
 857
 858 {
 859    if (bld->static_state->min_lod == bld->static_state->max_lod) {
 860       /* User is forcing sampling from a particular mipmap level.
 861        * This is hit during mipmap generation.
 862        */
 863       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 864    }
 865    else {
 866       const int dims = texture_dims(bld->static_state->target);
 867       struct lp_build_context *float_bld = &bld->float_bld;
 868       LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
 869                                             bld->static_state->lod_bias);
 870       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
 871                                            bld->static_state->min_lod);
 872       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
 873                                            bld->static_state->max_lod);
 874
 875       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 876       LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
 877       LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
 878
 879       LLVMValueRef s0, s1, s2;
 880       LLVMValueRef t0, t1, t2;
 881       LLVMValueRef r0, r1, r2;
 882       LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
 883       LLVMValueRef rho, lod;
 884
 885       /*
 886        * dsdx = abs(s[1] - s[0]);
 887        * dsdy = abs(s[2] - s[0]);
 888        * dtdx = abs(t[1] - t[0]);
 889        * dtdy = abs(t[2] - t[0]);
 890        * drdx = abs(r[1] - r[0]);
 891        * drdy = abs(r[2] - r[0]);
 892        * XXX we're assuming a four-element quad in 2x2 layout here.
 893        */
 894       s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
 895       s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
 896       s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
 897       dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
 898       dsdx = lp_build_abs(float_bld, dsdx);
 899       dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
 900       dsdy = lp_build_abs(float_bld, dsdy);
 901       if (dims > 1) {
 902          t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
 903          t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
 904          t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
 905          dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
 906          dtdx = lp_build_abs(float_bld, dtdx);
 907          dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
 908          dtdy = lp_build_abs(float_bld, dtdy);
 909          if (dims > 2) {
 910             r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
 911             r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
 912             r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
 913             drdx = LLVMBuildSub(bld->builder, r1, r0, "");
 914             drdx = lp_build_abs(float_bld, drdx);
 915             drdy = LLVMBuildSub(bld->builder, r2, r0, "");
 916             drdy = lp_build_abs(float_bld, drdy);
 917          }
 918       }
 919
 920       /* Compute rho = max of all partial derivatives scaled by texture size.
 921        * XXX this could be vectorized somewhat
 922        */
 923       rho = LLVMBuildMul(bld->builder,
 924                          lp_build_max(float_bld, dsdx, dsdy),
 925                          lp_build_int_to_float(float_bld, width), "");
 926       if (dims > 1) {
 927          LLVMValueRef max;
 928          max = LLVMBuildMul(bld->builder,
 929                             lp_build_max(float_bld, dtdx, dtdy),
 930                             lp_build_int_to_float(float_bld, height), "");
 931          rho = lp_build_max(float_bld, rho, max);
 932          if (dims > 2) {
 933             max = LLVMBuildMul(bld->builder,
 934                                lp_build_max(float_bld, drdx, drdy),
 935                                lp_build_int_to_float(float_bld, depth), "");
 936             rho = lp_build_max(float_bld, rho, max);
 937          }
 938       }
 939
 940       /* compute lod = log2(rho) */
 941       lod = lp_build_log2(float_bld, rho);
 942
 943       /* add lod bias */
 944       lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
 945
 946       /* clamp lod */
 947       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 948
 949       return lod;
 950    }
 951 }
 952
 953
 954 /**
 955  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 956  * mipmap level index.
 957  * Note: this is all scalar code.
 958  * \param lod  scalar float texture level of detail
 959  * \param level_out  returns integer
 960  */
 961 static void
 962 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 963                            unsigned unit,
 964                            LLVMValueRef lod,
 965                            LLVMValueRef *level_out)
 966 {
 967    struct lp_build_context *float_bld = &bld->float_bld;
 968    struct lp_build_context *int_bld = &bld->int_bld;
 969    LLVMValueRef last_level, level;
 970
 971    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
 972
 973    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 974                                                bld->builder, unit);
 975
 976    /* convert float lod to integer */
 977    level = lp_build_iround(float_bld, lod);
 978
 979    /* clamp level to legal range of levels */
 980    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
 981 }
 982
 983
 984 /**
 985  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 986  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 987  * two mipmap levels and interpolate between them.
 988  */
 989 static void
 990 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 991                            unsigned unit,
 992                            LLVMValueRef lod,
 993                            LLVMValueRef *level0_out,
 994                            LLVMValueRef *level1_out,
 995                            LLVMValueRef *weight_out)
 996 {
 997    struct lp_build_context *float_bld = &bld->float_bld;
 998    struct lp_build_context *int_bld = &bld->int_bld;
 999    LLVMValueRef last_level, level;
1000
1001    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
1002                                                bld->builder, unit);
1003
1004    /* convert float lod to integer */
1005    level = lp_build_ifloor(float_bld, lod);
1006
1007    /* compute level 0 and clamp to legal range of levels */
1008    *level0_out = lp_build_clamp(int_bld, level,
1009                                 int_bld->zero,
1010                                 last_level);
1011    /* compute level 1 and clamp to legal range of levels */
1012    *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
1013    *level1_out = lp_build_min(int_bld, *level1_out, last_level);
1014
1015    *weight_out = lp_build_fract(float_bld, lod);
1016 }
1017
1018
1019 /**
1020  * Generate code to sample a mipmap level with nearest filtering.
1021  * If sampling a cube texture, r = cube face in [0,5].
1022  */
1023 static void
1024 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1025                               LLVMValueRef width_vec,
1026                               LLVMValueRef height_vec,
1027                               LLVMValueRef depth_vec,
1028                               LLVMValueRef row_stride_vec,
1029                               LLVMValueRef img_stride_vec,
1030                               LLVMValueRef data_ptr,
1031                               LLVMValueRef s,
1032                               LLVMValueRef t,
1033                               LLVMValueRef r,
1034                               LLVMValueRef colors_out[4])
1035 {
1036    const int dims = texture_dims(bld->static_state->target);
1037    LLVMValueRef x, y, z;
1038
1039    /*
1040     * Compute integer texcoords.
1041     */
1042    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1043                                     bld->static_state->pot_width,
1044                                     bld->static_state->wrap_s);
1045    lp_build_name(x, "tex.x.wrapped");
1046
1047    if (dims >= 2) {
1048       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1049                                        bld->static_state->pot_height,
1050                                        bld->static_state->wrap_t);
1051       lp_build_name(y, "tex.y.wrapped");
1052
1053       if (dims == 3) {
1054          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1055                                           bld->static_state->pot_height,
1056                                           bld->static_state->wrap_r);
1057          lp_build_name(z, "tex.z.wrapped");
1058       }
1059       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1060          z = r;
1061       }
1062       else {
1063          z = NULL;
1064       }
1065    }
1066    else {
1067       y = z = NULL;
1068    }
1069
1070    /*
1071     * Get texture colors.
1072     */
1073    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1074                              x, y, z,
1075                              row_stride_vec, img_stride_vec,
1076                              data_ptr, colors_out);
1077 }
1078
1079
1080 /**
1081  * Generate code to sample a mipmap level with linear filtering.
1082  * If sampling a cube texture, r = cube face in [0,5].
1083  */
1084 static void
1085 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1086                              LLVMValueRef width_vec,
1087                              LLVMValueRef height_vec,
1088                              LLVMValueRef depth_vec,
1089                              LLVMValueRef row_stride_vec,
1090                              LLVMValueRef img_stride_vec,
1091                              LLVMValueRef data_ptr,
1092                              LLVMValueRef s,
1093                              LLVMValueRef t,
1094                              LLVMValueRef r,
1095                              LLVMValueRef colors_out[4])
1096 {
1097    const int dims = texture_dims(bld->static_state->target);
1098    LLVMValueRef x0, y0, z0, x1, y1, z1;
1099    LLVMValueRef s_fpart, t_fpart, r_fpart;
1100    LLVMValueRef neighbors[2][2][4];
1101    int chan;
1102
1103    /*
1104     * Compute integer texcoords.
1105     */
1106    lp_build_sample_wrap_linear(bld, s, width_vec,
1107                                bld->static_state->pot_width,
1108                                bld->static_state->wrap_s,
1109                                &x0, &x1, &s_fpart);
1110    lp_build_name(x0, "tex.x0.wrapped");
1111    lp_build_name(x1, "tex.x1.wrapped");
1112
1113    if (dims >= 2) {
1114       lp_build_sample_wrap_linear(bld, t, height_vec,
1115                                   bld->static_state->pot_height,
1116                                   bld->static_state->wrap_t,
1117                                   &y0, &y1, &t_fpart);
1118       lp_build_name(y0, "tex.y0.wrapped");
1119       lp_build_name(y1, "tex.y1.wrapped");
1120
1121       if (dims == 3) {
1122          lp_build_sample_wrap_linear(bld, r, depth_vec,
1123                                      bld->static_state->pot_depth,
1124                                      bld->static_state->wrap_r,
1125                                      &z0, &z1, &r_fpart);
1126          lp_build_name(z0, "tex.z0.wrapped");
1127          lp_build_name(z1, "tex.z1.wrapped");
1128       }
1129       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1130          z0 = z1 = r;  /* cube face */
1131          r_fpart = NULL;
1132       }
1133       else {
1134          z0 = z1 = NULL;
1135          r_fpart = NULL;
1136       }
1137    }
1138    else {
1139       y0 = y1 = t_fpart = NULL;
1140       z0 = z1 = r_fpart = NULL;
1141    }
1142
1143    /*
1144     * Get texture colors.
1145     */
1146    /* get x0/x1 texels */
1147    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1148                              x0, y0, z0,
1149                              row_stride_vec, img_stride_vec,
1150                              data_ptr, neighbors[0][0]);
1151    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1152                              x1, y0, z0,
1153                              row_stride_vec, img_stride_vec,
1154                              data_ptr, neighbors[0][1]);
1155
1156    if (dims == 1) {
1157       /* Interpolate two samples from 1D image to produce one color */
1158       for (chan = 0; chan < 4; chan++) {
1159          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1160                                           neighbors[0][0][chan],
1161                                           neighbors[0][1][chan]);
1162       }
1163    }
1164    else {
1165       /* 2D/3D texture */
1166       LLVMValueRef colors0[4];
1167
1168       /* get x0/x1 texels at y1 */
1169       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1170                                 x0, y1, z0,
1171                                 row_stride_vec, img_stride_vec,
1172                                 data_ptr, neighbors[1][0]);
1173       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1174                                 x1, y1, z0,
1175                                 row_stride_vec, img_stride_vec,
1176                                 data_ptr, neighbors[1][1]);
1177
1178       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1179       for (chan = 0; chan < 4; chan++) {
1180          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1181                                           s_fpart, t_fpart,
1182                                           neighbors[0][0][chan],
1183                                           neighbors[0][1][chan],
1184                                           neighbors[1][0][chan],
1185                                           neighbors[1][1][chan]);
1186       }
1187
1188       if (dims == 3) {
1189          LLVMValueRef neighbors1[2][2][4];
1190          LLVMValueRef colors1[4];
1191
1192          /* get x0/x1/y0/y1 texels at z1 */
1193          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1194                                    x0, y0, z1,
1195                                    row_stride_vec, img_stride_vec,
1196                                    data_ptr, neighbors1[0][0]);
1197          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1198                                    x1, y0, z1,
1199                                    row_stride_vec, img_stride_vec,
1200                                    data_ptr, neighbors1[0][1]);
1201          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1202                                    x0, y1, z1,
1203                                    row_stride_vec, img_stride_vec,
1204                                    data_ptr, neighbors1[1][0]);
1205          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1206                                    x1, y1, z1,
1207                                    row_stride_vec, img_stride_vec,
1208                                    data_ptr, neighbors1[1][1]);
1209
1210          /* Bilinear interpolate the four samples from the second Z slice */
1211          for (chan = 0; chan < 4; chan++) {
1212             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1213                                              s_fpart, t_fpart,
1214                                              neighbors1[0][0][chan],
1215                                              neighbors1[0][1][chan],
1216                                              neighbors1[1][0][chan],
1217                                              neighbors1[1][1][chan]);
1218          }
1219
1220          /* Linearly interpolate the two samples from the two 3D slices */
1221          for (chan = 0; chan < 4; chan++) {
1222             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1223                                              r_fpart,
1224                                              colors0[chan], colors1[chan]);
1225          }
1226       }
1227       else {
1228          /* 2D tex */
1229          for (chan = 0; chan < 4; chan++) {
1230             colors_out[chan] = colors0[chan];
1231          }
1232       }
1233    }
1234 }
1235
1236
1237 /** Helper used by lp_build_cube_lookup() */
1238 static LLVMValueRef
1239 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1240 {
1241    /* ima = -0.5 / abs(coord); */
1242    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1243    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1244    LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
1245                                    lp_build_rcp(coord_bld, absCoord));
1246    return ima;
1247 }
1248
1249
1250 /**
1251  * Helper used by lp_build_cube_lookup()
1252  * \param sign  scalar +1 or -1
1253  * \param coord  float vector
1254  * \param ima  float vector
1255  */
1256 static LLVMValueRef
1257 lp_build_cube_coord(struct lp_build_context *coord_bld,
1258                     LLVMValueRef sign, int negate_coord,
1259                     LLVMValueRef coord, LLVMValueRef ima)
1260 {
1261    /* return negate(coord) * ima * sign + 0.5; */
1262    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1263    LLVMValueRef res;
1264
1265    assert(negate_coord == +1 || negate_coord == -1);
1266
1267    if (negate_coord == -1) {
1268       coord = lp_build_negate(coord_bld, coord);
1269    }
1270
1271    res = lp_build_mul(coord_bld, coord, ima);
1272    if (sign) {
1273       sign = lp_build_broadcast_scalar(coord_bld, sign);
1274       res = lp_build_mul(coord_bld, res, sign);
1275    }
1276    res = lp_build_add(coord_bld, res, half);
1277
1278    return res;
1279 }
1280
1281
1282 /** Helper used by lp_build_cube_lookup()
1283  * Return (major_coord >= 0) ? pos_face : neg_face;
1284  */
1285 static LLVMValueRef
1286 lp_build_cube_face(struct lp_build_sample_context *bld,
1287                    LLVMValueRef major_coord,
1288                    unsigned pos_face, unsigned neg_face)
1289 {
1290    LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1291                                     major_coord,
1292                                     bld->float_bld.zero, "");
1293    LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1294    LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1295    LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1296    return res;
1297 }
1298
1299
1300
1301 /**
1302  * Generate code to do cube face selection and per-face texcoords.
1303  */
1304 static void
1305 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1306                      LLVMValueRef s,
1307                      LLVMValueRef t,
1308                      LLVMValueRef r,
1309                      LLVMValueRef *face,
1310                      LLVMValueRef *face_s,
1311                      LLVMValueRef *face_t)
1312 {
1313    struct lp_build_context *float_bld = &bld->float_bld;
1314    struct lp_build_context *coord_bld = &bld->coord_bld;
1315    LLVMValueRef rx, ry, rz;
1316    LLVMValueRef arx, ary, arz;
1317    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1318    LLVMValueRef arx_ge_ary, arx_ge_arz;
1319    LLVMValueRef ary_ge_arx, ary_ge_arz;
1320    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1321    LLVMValueRef rx_pos, ry_pos, rz_pos;
1322
1323    assert(bld->coord_bld.type.length == 4);
1324
1325    /*
1326     * Use the average of the four pixel's texcoords to choose the face.
1327     */
1328    rx = lp_build_mul(float_bld, c25,
1329                      lp_build_sum_vector(&bld->coord_bld, s));
1330    ry = lp_build_mul(float_bld, c25,
1331                      lp_build_sum_vector(&bld->coord_bld, t));
1332    rz = lp_build_mul(float_bld, c25,
1333                      lp_build_sum_vector(&bld->coord_bld, r));
1334
1335    arx = lp_build_abs(float_bld, rx);
1336    ary = lp_build_abs(float_bld, ry);
1337    arz = lp_build_abs(float_bld, rz);
1338
1339    /*
1340     * Compare sign/magnitude of rx,ry,rz to determine face
1341     */
1342    arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1343    arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1344    ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1345    ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1346
1347    arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1348    ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1349
1350    rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1351    ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1352    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1353
1354    {
1355       struct lp_build_flow_context *flow_ctx;
1356       struct lp_build_if_state if_ctx;
1357
1358       flow_ctx = lp_build_flow_create(bld->builder);
1359       lp_build_flow_scope_begin(flow_ctx);
1360
1361       *face_s = bld->coord_bld.undef;
1362       *face_t = bld->coord_bld.undef;
1363       *face = bld->int_bld.undef;
1364
1365       lp_build_name(*face_s, "face_s");
1366       lp_build_name(*face_t, "face_t");
1367       lp_build_name(*face, "face");
1368
1369       lp_build_flow_scope_declare(flow_ctx, face_s);
1370       lp_build_flow_scope_declare(flow_ctx, face_t);
1371       lp_build_flow_scope_declare(flow_ctx, face);
1372
1373       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1374       {
1375          /* +/- X face */
1376          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1377          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1378          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1379          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1380          *face = lp_build_cube_face(bld, rx,
1381                                     PIPE_TEX_FACE_POS_X,
1382                                     PIPE_TEX_FACE_NEG_X);
1383       }
1384       lp_build_else(&if_ctx);
1385       {
1386          struct lp_build_flow_context *flow_ctx2;
1387          struct lp_build_if_state if_ctx2;
1388
1389          LLVMValueRef face_s2 = bld->coord_bld.undef;
1390          LLVMValueRef face_t2 = bld->coord_bld.undef;
1391          LLVMValueRef face2 = bld->int_bld.undef;
1392
1393          flow_ctx2 = lp_build_flow_create(bld->builder);
1394          lp_build_flow_scope_begin(flow_ctx2);
1395          lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1396          lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1397          lp_build_flow_scope_declare(flow_ctx2, &face2);
1398
1399          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1400
1401          lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1402          {
1403             /* +/- Y face */
1404             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1405             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1406             face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1407             face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1408             face2 = lp_build_cube_face(bld, ry,
1409                                        PIPE_TEX_FACE_POS_Y,
1410                                        PIPE_TEX_FACE_NEG_Y);
1411          }
1412          lp_build_else(&if_ctx2);
1413          {
1414             /* +/- Z face */
1415             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1416             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1417             face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1418             face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1419             face2 = lp_build_cube_face(bld, rz,
1420                                        PIPE_TEX_FACE_POS_Z,
1421                                        PIPE_TEX_FACE_NEG_Z);
1422          }
1423          lp_build_endif(&if_ctx2);
1424          lp_build_flow_scope_end(flow_ctx2);
1425          lp_build_flow_destroy(flow_ctx2);
1426
1427          *face_s = face_s2;
1428          *face_t = face_t2;
1429          *face = face2;
1430       }
1431
1432       lp_build_endif(&if_ctx);
1433       lp_build_flow_scope_end(flow_ctx);
1434       lp_build_flow_destroy(flow_ctx);
1435    }
1436 }
1437
1438
1439
1440 /**
1441  * Sample the texture/mipmap using given image filter and mip filter.
1442  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1443  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1444  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1445  */
1446 static void
1447 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1448                        unsigned img_filter,
1449                        unsigned mip_filter,
1450                        LLVMValueRef s,
1451                        LLVMValueRef t,
1452                        LLVMValueRef r,
1453                        LLVMValueRef lod_fpart,
1454                        LLVMValueRef width0_vec,
1455                        LLVMValueRef width1_vec,
1456                        LLVMValueRef height0_vec,
1457                        LLVMValueRef height1_vec,
1458                        LLVMValueRef depth0_vec,
1459                        LLVMValueRef depth1_vec,
1460                        LLVMValueRef row_stride0_vec,
1461                        LLVMValueRef row_stride1_vec,
1462                        LLVMValueRef img_stride0_vec,
1463                        LLVMValueRef img_stride1_vec,
1464                        LLVMValueRef data_ptr0,
1465                        LLVMValueRef data_ptr1,
1466                        LLVMValueRef *colors_out)
1467 {
1468    LLVMValueRef colors0[4], colors1[4];
1469    int chan;
1470
1471    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1472       lp_build_sample_image_nearest(bld,
1473                                     width0_vec, height0_vec, depth0_vec,
1474                                     row_stride0_vec, img_stride0_vec,
1475                                     data_ptr0, s, t, r, colors0);
1476
1477       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1478          /* sample the second mipmap level, and interp */
1479          lp_build_sample_image_nearest(bld,
1480                                        width1_vec, height1_vec, depth1_vec,
1481                                        row_stride1_vec, img_stride1_vec,
1482                                        data_ptr1, s, t, r, colors1);
1483       }
1484    }
1485    else {
1486       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1487
1488       lp_build_sample_image_linear(bld,
1489                                    width0_vec, height0_vec, depth0_vec,
1490                                    row_stride0_vec, img_stride0_vec,
1491                                    data_ptr0, s, t, r, colors0);
1492
1493       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1494          /* sample the second mipmap level, and interp */
1495          lp_build_sample_image_linear(bld,
1496                                       width1_vec, height1_vec, depth1_vec,
1497                                       row_stride1_vec, img_stride1_vec,
1498                                       data_ptr1, s, t, r, colors1);
1499       }
1500    }
1501
1502    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1503       /* interpolate samples from the two mipmap levels */
1504       for (chan = 0; chan < 4; chan++) {
1505          colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1506                                           colors0[chan], colors1[chan]);
1507       }
1508    }
1509    else {
1510       /* use first/only level's colors */
1511       for (chan = 0; chan < 4; chan++) {
1512          colors_out[chan] = colors0[chan];
1513       }
1514    }
1515 }
1516
1517
1518
1519 /**
1520  * General texture sampling codegen.
1521  * This function handles texture sampling for all texture targets (1D,
1522  * 2D, 3D, cube) and all filtering modes.
1523  */
1524 static void
1525 lp_build_sample_general(struct lp_build_sample_context *bld,
1526                         unsigned unit,
1527                         LLVMValueRef s,
1528                         LLVMValueRef t,
1529                         LLVMValueRef r,
1530                         LLVMValueRef width,
1531                         LLVMValueRef height,
1532                         LLVMValueRef depth,
1533                         LLVMValueRef width_vec,
1534                         LLVMValueRef height_vec,
1535                         LLVMValueRef depth_vec,
1536                         LLVMValueRef row_stride_array,
1537                         LLVMValueRef img_stride_array,
1538                         LLVMValueRef data_array,
1539                         LLVMValueRef *colors_out)
1540 {
1541    struct lp_build_context *float_bld = &bld->float_bld;
1542    const unsigned mip_filter = bld->static_state->min_mip_filter;
1543    const unsigned min_filter = bld->static_state->min_img_filter;
1544    const unsigned mag_filter = bld->static_state->mag_img_filter;
1545    const int dims = texture_dims(bld->static_state->target);
1546    LLVMValueRef lod = NULL, lod_fpart = NULL;
1547    LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1548    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1549    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1550    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1551    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1552    LLVMValueRef data_ptr0, data_ptr1 = NULL;
1553
1554    /*
1555    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1556           mip_filter, min_filter, mag_filter);
1557    */
1558
1559    /*
1560     * Compute the level of detail (float).
1561     */
1562    if (min_filter != mag_filter ||
1563        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1564       /* Need to compute lod either to choose mipmap levels or to
1565        * distinguish between minification/magnification with one mipmap level.
1566        */
1567       lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
1568    }
1569
1570    /*
1571     * Compute integer mipmap level(s) to fetch texels from.
1572     */
1573    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1574       /* always use mip level 0 */
1575       ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1576    }
1577    else {
1578       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1579          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1580       }
1581       else {
1582          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1583          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1584                                     &lod_fpart);
1585          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1586       }
1587    }
1588
1589    /*
1590     * Convert scalar integer mipmap levels into vectors.
1591     */
1592    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1593    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1594       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1595
1596    /*
1597     * Compute width, height at mipmap level 'ilevel0'
1598     */
1599    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1600    if (dims >= 2) {
1601       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1602       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1603                                                       ilevel0);
1604       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1605          img_stride0_vec = lp_build_get_level_stride_vec(bld,
1606                                                          img_stride_array,
1607                                                          ilevel0);
1608          if (dims == 3) {
1609             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1610          }
1611       }
1612    }
1613    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1614       /* compute width, height, depth for second mipmap level at 'ilevel1' */
1615       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1616       if (dims >= 2) {
1617          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1618          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1619                                                          ilevel1);
1620          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1621             img_stride1_vec = lp_build_get_level_stride_vec(bld,
1622                                                             img_stride_array,
1623                                                             ilevel1);
1624             if (dims ==3) {
1625                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1626             }
1627          }
1628       }
1629    }
1630
1631    /*
1632     * Choose cube face, recompute per-face texcoords.
1633     */
1634    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1635       LLVMValueRef face, face_s, face_t;
1636       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1637       s = face_s; /* vec */
1638       t = face_t; /* vec */
1639       /* use 'r' to indicate cube face */
1640       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1641    }
1642
1643    /*
1644     * Get pointer(s) to image data for mipmap level(s).
1645     */
1646    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1647    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1648       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1649    }
1650
1651    /*
1652     * Get/interpolate texture colors.
1653     */
1654    if (min_filter == mag_filter) {
1655       /* no need to distinquish between minification and magnification */
1656       lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1657                              width0_vec, width1_vec,
1658                              height0_vec, height1_vec,
1659                              depth0_vec, depth1_vec,
1660                              row_stride0_vec, row_stride1_vec,
1661                              img_stride0_vec, img_stride1_vec,
1662                              data_ptr0, data_ptr1,
1663                              colors_out);
1664    }
1665    else {
1666       /* Emit conditional to choose min image filter or mag image filter
1667        * depending on the lod being >0 or <= 0, respectively.
1668        */
1669       struct lp_build_flow_context *flow_ctx;
1670       struct lp_build_if_state if_ctx;
1671       LLVMValueRef minify;
1672
1673       flow_ctx = lp_build_flow_create(bld->builder);
1674       lp_build_flow_scope_begin(flow_ctx);
1675
1676       lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1677       lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1678       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1679       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1680
1681       /* minify = lod > 0.0 */
1682       minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1683                              lod, float_bld->zero, "");
1684
1685       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1686       {
1687          /* Use the minification filter */
1688          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1689                                 s, t, r, lod_fpart,
1690                                 width0_vec, width1_vec,
1691                                 height0_vec, height1_vec,
1692                                 depth0_vec, depth1_vec,
1693                                 row_stride0_vec, row_stride1_vec,
1694                                 img_stride0_vec, img_stride1_vec,
1695                                 data_ptr0, data_ptr1,
1696                                 colors_out);
1697       }
1698       lp_build_else(&if_ctx);
1699       {
1700          /* Use the magnification filter */
1701          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1702                                 s, t, r, lod_fpart,
1703                                 width0_vec, width1_vec,
1704                                 height0_vec, height1_vec,
1705                                 depth0_vec, depth1_vec,
1706                                 row_stride0_vec, row_stride1_vec,
1707                                 img_stride0_vec, img_stride1_vec,
1708                                 data_ptr0, data_ptr1,
1709                                 colors_out);
1710       }
1711       lp_build_endif(&if_ctx);
1712
1713       lp_build_flow_scope_end(flow_ctx);
1714       lp_build_flow_destroy(flow_ctx);
1715    }
1716 }
1717
1718
1719
1720 static void
1721 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
1722                           struct lp_type dst_type,
1723                           LLVMValueRef packed,
1724                           LLVMValueRef *rgba)
1725 {
1726    LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
1727    unsigned chan;
1728
1729    /* Decode the input vector components */
1730    for (chan = 0; chan < 4; ++chan) {
1731       unsigned start = chan*8;
1732       unsigned stop = start + 8;
1733       LLVMValueRef input;
1734
1735       input = packed;
1736
1737       if(start)
1738          input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
1739
1740       if(stop < 32)
1741          input = LLVMBuildAnd(builder, input, mask, "");
1742
1743       input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
1744
1745       rgba[chan] = input;
1746    }
1747 }
1748
1749
1750 static void
1751 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1752                               LLVMValueRef s,
1753                               LLVMValueRef t,
1754                               LLVMValueRef width,
1755                               LLVMValueRef height,
1756                               LLVMValueRef stride_array,
1757                               LLVMValueRef data_array,
1758                               LLVMValueRef *texel)
1759 {
1760    LLVMBuilderRef builder = bld->builder;
1761    struct lp_build_context i32, h16, u8n;
1762    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1763    LLVMValueRef i32_c8, i32_c128, i32_c255;
1764    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1765    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1766    LLVMValueRef x0, x1;
1767    LLVMValueRef y0, y1;
1768    LLVMValueRef neighbors[2][2];
1769    LLVMValueRef neighbors_lo[2][2];
1770    LLVMValueRef neighbors_hi[2][2];
1771    LLVMValueRef packed, packed_lo, packed_hi;
1772    LLVMValueRef unswizzled[4];
1773    LLVMValueRef stride;
1774
1775    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1776    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1777    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1778
1779    i32_vec_type = lp_build_vec_type(i32.type);
1780    h16_vec_type = lp_build_vec_type(h16.type);
1781    u8n_vec_type = lp_build_vec_type(u8n.type);
1782
1783    if (bld->static_state->normalized_coords) {
1784       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1785       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1786       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1787       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1788       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1789    }
1790
1791    /* scale coords by 256 (8 fractional bits) */
1792    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1793    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1794
1795    /* convert float to int */
1796    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1797    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1798
1799    /* subtract 0.5 (add -128) */
1800    i32_c128 = lp_build_const_int_vec(i32.type, -128);
1801    s = LLVMBuildAdd(builder, s, i32_c128, "");
1802    t = LLVMBuildAdd(builder, t, i32_c128, "");
1803
1804    /* compute floor (shift right 8) */
1805    i32_c8 = lp_build_const_int_vec(i32.type, 8);
1806    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1807    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1808
1809    /* compute fractional part (AND with 0xff) */
1810    i32_c255 = lp_build_const_int_vec(i32.type, 255);
1811    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1812    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1813
1814    x0 = s_ipart;
1815    y0 = t_ipart;
1816
1817    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1818    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1819
1820    x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1821                                  bld->static_state->wrap_s);
1822    y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1823                                  bld->static_state->wrap_t);
1824
1825    x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1826                                  bld->static_state->wrap_s);
1827    y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1828                                  bld->static_state->wrap_t);
1829
1830    /*
1831     * Transform 4 x i32 in
1832     *
1833     *   s_fpart = {s0, s1, s2, s3}
1834     *
1835     * into 8 x i16
1836     *
1837     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1838     *
1839     * into two 8 x i16
1840     *
1841     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1842     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1843     *
1844     * and likewise for t_fpart. There is no risk of loosing precision here
1845     * since the fractional parts only use the lower 8bits.
1846     */
1847
1848    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1849    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1850
1851    {
1852       LLVMTypeRef elem_type = LLVMInt32Type();
1853       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1854       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1855       LLVMValueRef shuffle_lo;
1856       LLVMValueRef shuffle_hi;
1857       unsigned i, j;
1858
1859       for(j = 0; j < h16.type.length; j += 4) {
1860          unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1861          LLVMValueRef index;
1862
1863          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1864          for(i = 0; i < 4; ++i)
1865             shuffles_lo[j + i] = index;
1866
1867          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1868          for(i = 0; i < 4; ++i)
1869             shuffles_hi[j + i] = index;
1870       }
1871
1872       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1873       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1874
1875       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1876       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1877       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1878       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1879    }
1880
1881    stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1882
1883    /*
1884     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1885     *
1886     *   rgba0 rgba1 rgba2 rgba3
1887     *
1888     * bit cast them into 16 x u8
1889     *
1890     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1891     *
1892     * unpack them into two 8 x i16:
1893     *
1894     *   r0 g0 b0 a0 r1 g1 b1 a1
1895     *   r2 g2 b2 a2 r3 g3 b3 a3
1896     *
1897     * The higher 8 bits of the resulting elements will be zero.
1898     */
1899
1900    neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1901    neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1902    neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1903    neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1904
1905    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1906    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1907    neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1908    neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1909
1910    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1911    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1912    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1913    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1914
1915    /*
1916     * Linear interpolate with 8.8 fixed point.
1917     */
1918
1919    packed_lo = lp_build_lerp_2d(&h16,
1920                                 s_fpart_lo, t_fpart_lo,
1921                                 neighbors_lo[0][0],
1922                                 neighbors_lo[0][1],
1923                                 neighbors_lo[1][0],
1924                                 neighbors_lo[1][1]);
1925
1926    packed_hi = lp_build_lerp_2d(&h16,
1927                                 s_fpart_hi, t_fpart_hi,
1928                                 neighbors_hi[0][0],
1929                                 neighbors_hi[0][1],
1930                                 neighbors_hi[1][0],
1931                                 neighbors_hi[1][1]);
1932
1933    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1934
1935    /*
1936     * Convert to SoA and swizzle.
1937     */
1938
1939    packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
1940
1941    lp_build_rgba8_to_f32_soa(bld->builder,
1942                              bld->texel_type,
1943                              packed, unswizzled);
1944
1945    lp_build_format_swizzle_soa(bld->format_desc,
1946                                bld->texel_type, unswizzled,
1947                                texel);
1948 }
1949
1950
1951 static void
1952 lp_build_sample_compare(struct lp_build_sample_context *bld,
1953                         LLVMValueRef p,
1954                         LLVMValueRef *texel)
1955 {
1956    struct lp_build_context *texel_bld = &bld->texel_bld;
1957    LLVMValueRef res;
1958    unsigned chan;
1959
1960    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1961       return;
1962
1963    /* TODO: Compare before swizzling, to avoid redundant computations */
1964    res = NULL;
1965    for(chan = 0; chan < 4; ++chan) {
1966       LLVMValueRef cmp;
1967       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1968       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1969
1970       if(res)
1971          res = lp_build_add(texel_bld, res, cmp);
1972       else
1973          res = cmp;
1974    }
1975
1976    assert(res);
1977    res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
1978
1979    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1980    for(chan = 0; chan < 3; ++chan)
1981       texel[chan] = res;
1982    texel[3] = texel_bld->one;
1983 }
1984
1985
1986 /**
1987  * Build texture sampling code.
1988  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1989  * R, G, B, A.
1990  * \param type  vector float type to use for coords, etc.
1991  */
1992 void
1993 lp_build_sample_soa(LLVMBuilderRef builder,
1994                     const struct lp_sampler_static_state *static_state,
1995                     struct lp_sampler_dynamic_state *dynamic_state,
1996                     struct lp_type type,
1997                     unsigned unit,
1998                     unsigned num_coords,
1999                     const LLVMValueRef *coords,
2000                     LLVMValueRef lodbias,
2001                     LLVMValueRef *texel)
2002 {
2003    struct lp_build_sample_context bld;
2004    LLVMValueRef width, width_vec;
2005    LLVMValueRef height, height_vec;
2006    LLVMValueRef depth, depth_vec;
2007    LLVMValueRef row_stride_array, img_stride_array;
2008    LLVMValueRef data_array;
2009    LLVMValueRef s;
2010    LLVMValueRef t;
2011    LLVMValueRef r;
2012
2013    /* Setup our build context */
2014    memset(&bld, 0, sizeof bld);
2015    bld.builder = builder;
2016    bld.static_state = static_state;
2017    bld.dynamic_state = dynamic_state;
2018    bld.format_desc = util_format_description(static_state->format);
2019
2020    bld.float_type = lp_type_float(32);
2021    bld.int_type = lp_type_int(32);
2022    bld.coord_type = type;
2023    bld.uint_coord_type = lp_uint_type(type);
2024    bld.int_coord_type = lp_int_type(type);
2025    bld.texel_type = type;
2026
2027    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2028    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2029    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2030    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2031    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2032    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2033
2034    /* Get the dynamic state */
2035    width = dynamic_state->width(dynamic_state, builder, unit);
2036    height = dynamic_state->height(dynamic_state, builder, unit);
2037    depth = dynamic_state->depth(dynamic_state, builder, unit);
2038    row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2039    img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2040    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2041    /* Note that data_array is an array[level] of pointers to texture images */
2042
2043    s = coords[0];
2044    t = coords[1];
2045    r = coords[2];
2046
2047    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2048    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2049    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2050
2051    if (util_format_is_rgba8_variant(bld.format_desc) &&
2052        static_state->target == PIPE_TEXTURE_2D &&
2053        static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2054        static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2055        static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2056        is_simple_wrap_mode(static_state->wrap_s) &&
2057        is_simple_wrap_mode(static_state->wrap_t)) {
2058       /* special case */
2059       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2060                                     row_stride_array, data_array, texel);
2061    }
2062    else {
2063       lp_build_sample_general(&bld, unit, s, t, r,
2064                               width, height, depth,
2065                               width_vec, height_vec, depth_vec,
2066                               row_stride_array, img_stride_array,
2067                               data_array,
2068                               texel);
2069    }
2070
2071    lp_build_sample_compare(&bld, r, texel);
2072 }