src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include "pipe/p_defines.h"
  36 #include "pipe/p_state.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_dump.h"
  39 #include "util/u_memory.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_cpu_detect.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_flow.h"
  52 #include "lp_bld_format.h"
  53 #include "lp_bld_sample.h"
  54
  55
  56 /**
  57  * Keep all information for sampling code generation in a single place.
  58  */
  59 struct lp_build_sample_context
  60 {
  61    LLVMBuilderRef builder;
  62
  63    const struct lp_sampler_static_state *static_state;
  64
  65    struct lp_sampler_dynamic_state *dynamic_state;
  66
  67    const struct util_format_description *format_desc;
  68
  69    /** regular scalar float type */
  70    struct lp_type float_type;
  71    struct lp_build_context float_bld;
  72
  73    /** regular scalar float type */
  74    struct lp_type int_type;
  75    struct lp_build_context int_bld;
  76
  77    /** Incoming coordinates type and build context */
  78    struct lp_type coord_type;
  79    struct lp_build_context coord_bld;
  80
  81    /** Unsigned integer coordinates */
  82    struct lp_type uint_coord_type;
  83    struct lp_build_context uint_coord_bld;
  84
  85    /** Signed integer coordinates */
  86    struct lp_type int_coord_type;
  87    struct lp_build_context int_coord_bld;
  88
  89    /** Output texels type and build context */
  90    struct lp_type texel_type;
  91    struct lp_build_context texel_bld;
  92 };
  93
  94
  95 /**
  96  * Does the given texture wrap mode allow sampling the texture border color?
  97  * XXX maybe move this into gallium util code.
  98  */
  99 static boolean
 100 wrap_mode_uses_border_color(unsigned mode)
 101 {
 102    switch (mode) {
 103    case PIPE_TEX_WRAP_REPEAT:
 104    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 105    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 106    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 107       return FALSE;
 108    case PIPE_TEX_WRAP_CLAMP:
 109    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 110    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 111    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 112       return TRUE;
 113    default:
 114       assert(0 && "unexpected wrap mode");
 115       return FALSE;
 116    }
 117 }
 118
 119
 120 static LLVMValueRef
 121 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 122                           LLVMValueRef data_array, LLVMValueRef level)
 123 {
 124    LLVMValueRef indexes[2], data_ptr;
 125    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 126    indexes[1] = level;
 127    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 128    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 129    return data_ptr;
 130 }
 131
 132
 133 static LLVMValueRef
 134 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 135                                 LLVMValueRef data_array, int level)
 136 {
 137    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 138    return lp_build_get_mipmap_level(bld, data_array, lvl);
 139 }
 140
 141
 142 /**
 143  * Dereference stride_array[mipmap_level] array to get a stride.
 144  * Return stride as a vector.
 145  */
 146 static LLVMValueRef
 147 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 148                               LLVMValueRef stride_array, LLVMValueRef level)
 149 {
 150    LLVMValueRef indexes[2], stride;
 151    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 152    indexes[1] = level;
 153    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 154    stride = LLVMBuildLoad(bld->builder, stride, "");
 155    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 156    return stride;
 157 }
 158
 159
 160 /** Dereference stride_array[0] array to get a stride (as vector). */
 161 static LLVMValueRef
 162 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 163                                     LLVMValueRef stride_array, int level)
 164 {
 165    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 166    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 167 }
 168
 169
 170 static int
 171 texture_dims(enum pipe_texture_target tex)
 172 {
 173    switch (tex) {
 174    case PIPE_TEXTURE_1D:
 175       return 1;
 176    case PIPE_TEXTURE_2D:
 177    case PIPE_TEXTURE_CUBE:
 178       return 2;
 179    case PIPE_TEXTURE_3D:
 180       return 3;
 181    default:
 182       assert(0 && "bad texture target in texture_dims()");
 183       return 2;
 184    }
 185 }
 186
 187
 188 static LLVMValueRef
 189 lp_build_swizzle_chan_soa(struct lp_type type,
 190                           const LLVMValueRef *unswizzled,
 191                           enum util_format_swizzle swizzle)
 192 {
 193    switch (swizzle) {
 194    case PIPE_SWIZZLE_RED:
 195    case PIPE_SWIZZLE_GREEN:
 196    case PIPE_SWIZZLE_BLUE:
 197    case PIPE_SWIZZLE_ALPHA:
 198       return unswizzled[swizzle];
 199    case PIPE_SWIZZLE_ZERO:
 200       return lp_build_zero(type);
 201    case PIPE_SWIZZLE_ONE:
 202       return lp_build_one(type);
 203    default:
 204       assert(0);
 205       return lp_build_undef(type);
 206    }
 207 }
 208
 209
 210 static void
 211 lp_build_swizzle_soa(struct lp_build_sample_context *bld,
 212                      LLVMValueRef *texel)
 213 {
 214    LLVMValueRef unswizzled[4];
 215    unsigned char swizzles[4];
 216    unsigned chan;
 217
 218    for (chan = 0; chan < 4; ++chan) {
 219       unswizzled[chan] = texel[chan];
 220    }
 221
 222    swizzles[0] = bld->static_state->swizzle_r;
 223    swizzles[1] = bld->static_state->swizzle_g;
 224    swizzles[2] = bld->static_state->swizzle_b;
 225    swizzles[3] = bld->static_state->swizzle_a;
 226
 227    for (chan = 0; chan < 4; ++chan) {
 228       unsigned swizzle = swizzles[chan];
 229       texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type,
 230                                               unswizzled, swizzle);
 231    }
 232 }
 233
 234
 235
 236 /**
 237  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 238  * The computation depends on whether the texture is 1D, 2D or 3D.
 239  * The result, texel, will be:
 240  *   texel[0] = red values
 241  *   texel[1] = green values
 242  *   texel[2] = blue values
 243  *   texel[3] = alpha values
 244  */
 245 static void
 246 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 247                           LLVMValueRef width,
 248                           LLVMValueRef height,
 249                           LLVMValueRef depth,
 250                           LLVMValueRef x,
 251                           LLVMValueRef y,
 252                           LLVMValueRef z,
 253                           LLVMValueRef y_stride,
 254                           LLVMValueRef z_stride,
 255                           LLVMValueRef data_ptr,
 256                           LLVMValueRef *texel)
 257 {
 258    const int dims = texture_dims(bld->static_state->target);
 259    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 260    LLVMValueRef offset;
 261    LLVMValueRef i, j;
 262    LLVMValueRef use_border = NULL;
 263
 264    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 265    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 266       LLVMValueRef b1, b2;
 267       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 268       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 269       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 270    }
 271
 272    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 273       LLVMValueRef b1, b2;
 274       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 275       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 276       if (use_border) {
 277          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 278          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 279       }
 280       else {
 281          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 282       }
 283    }
 284
 285    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 286       LLVMValueRef b1, b2;
 287       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 288       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 289       if (use_border) {
 290          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 291          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 292       }
 293       else {
 294          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 295       }
 296    }
 297
 298    /*
 299     * Describe the coordinates in terms of pixel blocks.
 300     *
 301     * TODO: pixel blocks are power of two. LLVM should convert rem/div to
 302     * bit arithmetic. Verify this.
 303     */
 304
 305    if (bld->format_desc->block.width == 1) {
 306       i = bld->uint_coord_bld.zero;
 307    }
 308    else {
 309       LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
 310       i = LLVMBuildURem(bld->builder, x, block_width, "");
 311       x = LLVMBuildUDiv(bld->builder, x, block_width, "");
 312    }
 313
 314    if (bld->format_desc->block.height == 1) {
 315       j = bld->uint_coord_bld.zero;
 316    }
 317    else {
 318       LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
 319       j = LLVMBuildURem(bld->builder, y, block_height, "");
 320       y = LLVMBuildUDiv(bld->builder, y, block_height, "");
 321    }
 322
 323    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 324    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 325                                    bld->format_desc,
 326                                    x, y, z, y_stride, z_stride);
 327
 328    if (use_border) {
 329       /* If we can sample the border color, it means that texcoords may
 330        * lie outside the bounds of the texture image.  We need to do
 331        * something to prevent reading out of bounds and causing a segfault.
 332        *
 333        * Simply AND the texture coords with !use_border.  This will cause
 334        * coords which are out of bounds to become zero.  Zero's guaranteed
 335        * to be inside the texture image.
 336        */
 337       offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
 338    }
 339
 340    lp_build_fetch_rgba_soa(bld->builder,
 341                            bld->format_desc,
 342                            bld->texel_type,
 343                            data_ptr, offset,
 344                            i, j,
 345                            texel);
 346
 347    lp_build_swizzle_soa(bld, texel);
 348
 349    /*
 350     * Note: if we find an app which frequently samples the texture border
 351     * we might want to implement a true conditional here to avoid sampling
 352     * the texture whenever possible (since that's quite a bit of code).
 353     * Ex:
 354     *   if (use_border) {
 355     *      texel = border_color;
 356     *   }
 357     *   else {
 358     *      texel = sample_texture(coord);
 359     *   }
 360     * As it is now, we always sample the texture, then selectively replace
 361     * the texel color results with the border color.
 362     */
 363
 364    if (use_border) {
 365       /* select texel color or border color depending on use_border */
 366       int chan;
 367       for (chan = 0; chan < 4; chan++) {
 368          LLVMValueRef border_chan =
 369             lp_build_const_vec(bld->texel_type,
 370                                   bld->static_state->border_color[chan]);
 371          texel[chan] = lp_build_select(&bld->texel_bld, use_border,
 372                                        border_chan, texel[chan]);
 373       }
 374    }
 375 }
 376
 377
 378 static LLVMValueRef
 379 lp_build_sample_packed(struct lp_build_sample_context *bld,
 380                        LLVMValueRef x,
 381                        LLVMValueRef y,
 382                        LLVMValueRef y_stride,
 383                        LLVMValueRef data_array)
 384 {
 385    LLVMValueRef offset;
 386    LLVMValueRef data_ptr;
 387
 388    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 389                                    bld->format_desc,
 390                                    x, y, NULL, y_stride, NULL);
 391
 392    assert(bld->format_desc->block.width == 1);
 393    assert(bld->format_desc->block.height == 1);
 394    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 395
 396    /* get pointer to mipmap level 0 data */
 397    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 398
 399    return lp_build_gather(bld->builder,
 400                           bld->texel_type.length,
 401                           bld->format_desc->block.bits,
 402                           bld->texel_type.width,
 403                           data_ptr, offset);
 404 }
 405
 406
 407 /**
 408  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 409  */
 410 static LLVMValueRef
 411 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 412                       LLVMValueRef coord)
 413 {
 414    struct lp_build_context *coord_bld = &bld->coord_bld;
 415    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 416    LLVMValueRef fract, flr, isOdd;
 417
 418    /* fract = coord - floor(coord) */
 419    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 420
 421    /* flr = ifloor(coord); */
 422    flr = lp_build_ifloor(coord_bld, coord);
 423
 424    /* isOdd = flr & 1 */
 425    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 426
 427    /* make coord positive or negative depending on isOdd */
 428    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 429
 430    /* convert isOdd to float */
 431    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 432
 433    /* add isOdd to coord */
 434    coord = lp_build_add(coord_bld, coord, isOdd);
 435
 436    return coord;
 437 }
 438
 439
 440 /**
 441  * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
 442  * Return whether the given mode is supported by that function.
 443  */
 444 static boolean
 445 is_simple_wrap_mode(unsigned mode)
 446 {
 447    switch (mode) {
 448    case PIPE_TEX_WRAP_REPEAT:
 449    case PIPE_TEX_WRAP_CLAMP:
 450    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 451       return TRUE;
 452    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 453    default:
 454       return FALSE;
 455    }
 456 }
 457
 458
 459 /**
 460  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 461  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 462  * \param length  the texture size along one dimension
 463  * \param is_pot  if TRUE, length is a power of two
 464  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 465  */
 466 static LLVMValueRef
 467 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
 468                          LLVMValueRef coord,
 469                          LLVMValueRef length,
 470                          boolean is_pot,
 471                          unsigned wrap_mode)
 472 {
 473    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 474    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 475    LLVMValueRef length_minus_one;
 476
 477    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 478
 479    switch(wrap_mode) {
 480    case PIPE_TEX_WRAP_REPEAT:
 481       if(is_pot)
 482          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 483       else
 484          /* Signed remainder won't give the right results for negative
 485           * dividends but unsigned remainder does.*/
 486          coord = LLVMBuildURem(bld->builder, coord, length, "");
 487       break;
 488
 489    case PIPE_TEX_WRAP_CLAMP:
 490    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 491    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 492       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 493       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 494       break;
 495
 496    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 497    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 498    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 499    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 500       /* FIXME */
 501       _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
 502                     util_dump_tex_wrap(wrap_mode, TRUE));
 503       coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
 504       coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
 505       break;
 506
 507    default:
 508       assert(0);
 509    }
 510
 511    return coord;
 512 }
 513
 514
 515 /**
 516  * Build LLVM code for texture wrap mode for linear filtering.
 517  * \param x0_out  returns first integer texcoord
 518  * \param x1_out  returns second integer texcoord
 519  * \param weight_out  returns linear interpolation weight
 520  */
 521 static void
 522 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 523                             LLVMValueRef coord,
 524                             LLVMValueRef length,
 525                             boolean is_pot,
 526                             unsigned wrap_mode,
 527                             LLVMValueRef *x0_out,
 528                             LLVMValueRef *x1_out,
 529                             LLVMValueRef *weight_out)
 530 {
 531    struct lp_build_context *coord_bld = &bld->coord_bld;
 532    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 533    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 534    LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
 535    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
 536    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 537    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 538    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
 539    LLVMValueRef coord0, coord1, weight;
 540
 541    switch(wrap_mode) {
 542    case PIPE_TEX_WRAP_REPEAT:
 543       /* mul by size and subtract 0.5 */
 544       coord = lp_build_mul(coord_bld, coord, length_f);
 545       coord = lp_build_sub(coord_bld, coord, half);
 546       /* convert to int */
 547       coord0 = lp_build_ifloor(coord_bld, coord);
 548       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 549       /* compute lerp weight */
 550       weight = lp_build_fract(coord_bld, coord);
 551       /* repeat wrap */
 552       if (is_pot) {
 553          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 554          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 555       }
 556       else {
 557          /* Signed remainder won't give the right results for negative
 558           * dividends but unsigned remainder does.*/
 559          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 560          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 561       }
 562       break;
 563
 564    case PIPE_TEX_WRAP_CLAMP:
 565       if (bld->static_state->normalized_coords) {
 566          coord = lp_build_mul(coord_bld, coord, length_f);
 567       }
 568       weight = lp_build_fract(coord_bld, coord);
 569       coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
 570                               length_f_minus_one);
 571       coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
 572       coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
 573                               length_f_minus_one);
 574       coord0 = lp_build_ifloor(coord_bld, coord0);
 575       coord1 = lp_build_ifloor(coord_bld, coord1);
 576       break;
 577
 578    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 579       if (bld->static_state->normalized_coords) {
 580          /* clamp to [0,1] */
 581          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 582          /* mul by tex size and subtract 0.5 */
 583          coord = lp_build_mul(coord_bld, coord, length_f);
 584          coord = lp_build_sub(coord_bld, coord, half);
 585       }
 586       else {
 587          LLVMValueRef min, max;
 588          /* clamp to [0.5, length - 0.5] */
 589          min = lp_build_const_vec(coord_bld->type, 0.5F);
 590          max = lp_build_sub(coord_bld, length_f, min);
 591          coord = lp_build_clamp(coord_bld, coord, min, max);
 592       }
 593       /* compute lerp weight */
 594       weight = lp_build_fract(coord_bld, coord);
 595       /* coord0 = floor(coord); */
 596       coord0 = lp_build_ifloor(coord_bld, coord);
 597       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 598       /* coord0 = max(coord0, 0) */
 599       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 600       /* coord1 = min(coord1, length-1) */
 601       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 602       break;
 603
 604    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 605       {
 606          LLVMValueRef min, max;
 607          if (bld->static_state->normalized_coords) {
 608             /* min = -1.0 / (2 * length) = -0.5 / length */
 609             min = lp_build_mul(coord_bld,
 610                                lp_build_const_vec(coord_bld->type, -0.5F),
 611                                lp_build_rcp(coord_bld, length_f));
 612             /* max = 1.0 - min */
 613             max = lp_build_sub(coord_bld, coord_bld->one, min);
 614             /* coord = clamp(coord, min, max) */
 615             coord = lp_build_clamp(coord_bld, coord, min, max);
 616             /* scale coord to length (and sub 0.5?) */
 617             coord = lp_build_mul(coord_bld, coord, length_f);
 618             coord = lp_build_sub(coord_bld, coord, half);
 619          }
 620          else {
 621             /* clamp to [-0.5, length + 0.5] */
 622             min = lp_build_const_vec(coord_bld->type, -0.5F);
 623             max = lp_build_sub(coord_bld, length_f, min);
 624             coord = lp_build_clamp(coord_bld, coord, min, max);
 625             coord = lp_build_sub(coord_bld, coord, half);
 626          }
 627          /* compute lerp weight */
 628          weight = lp_build_fract(coord_bld, coord);
 629          /* convert to int */
 630          coord0 = lp_build_ifloor(coord_bld, coord);
 631          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 632       }
 633       break;
 634
 635    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 636       /* compute mirror function */
 637       coord = lp_build_coord_mirror(bld, coord);
 638
 639       /* scale coord to length */
 640       coord = lp_build_mul(coord_bld, coord, length_f);
 641       coord = lp_build_sub(coord_bld, coord, half);
 642
 643       /* compute lerp weight */
 644       weight = lp_build_fract(coord_bld, coord);
 645
 646       /* convert to int coords */
 647       coord0 = lp_build_ifloor(coord_bld, coord);
 648       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 649
 650       /* coord0 = max(coord0, 0) */
 651       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 652       /* coord1 = min(coord1, length-1) */
 653       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 654       break;
 655
 656    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 657       {
 658          LLVMValueRef min, max;
 659          /* min = 1.0 / (2 * length) */
 660          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 661          /* max = 1.0 - min */
 662          max = lp_build_sub(coord_bld, coord_bld->one, min);
 663
 664          coord = lp_build_abs(coord_bld, coord);
 665          coord = lp_build_clamp(coord_bld, coord, min, max);
 666          coord = lp_build_mul(coord_bld, coord, length_f);
 667          if(0)coord = lp_build_sub(coord_bld, coord, half);
 668          weight = lp_build_fract(coord_bld, coord);
 669          coord0 = lp_build_ifloor(coord_bld, coord);
 670          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 671       }
 672       break;
 673
 674    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 675       {
 676          LLVMValueRef min, max;
 677          /* min = 1.0 / (2 * length) */
 678          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 679          /* max = 1.0 - min */
 680          max = lp_build_sub(coord_bld, coord_bld->one, min);
 681
 682          coord = lp_build_abs(coord_bld, coord);
 683          coord = lp_build_clamp(coord_bld, coord, min, max);
 684          coord = lp_build_mul(coord_bld, coord, length_f);
 685          coord = lp_build_sub(coord_bld, coord, half);
 686          weight = lp_build_fract(coord_bld, coord);
 687          coord0 = lp_build_ifloor(coord_bld, coord);
 688          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 689       }
 690       break;
 691
 692    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 693       {
 694          LLVMValueRef min, max;
 695          /* min = -1.0 / (2 * length) = -0.5 / length */
 696          min = lp_build_mul(coord_bld,
 697                             lp_build_const_vec(coord_bld->type, -0.5F),
 698                             lp_build_rcp(coord_bld, length_f));
 699          /* max = 1.0 - min */
 700          max = lp_build_sub(coord_bld, coord_bld->one, min);
 701
 702          coord = lp_build_abs(coord_bld, coord);
 703          coord = lp_build_clamp(coord_bld, coord, min, max);
 704          coord = lp_build_mul(coord_bld, coord, length_f);
 705          coord = lp_build_sub(coord_bld, coord, half);
 706          weight = lp_build_fract(coord_bld, coord);
 707          coord0 = lp_build_ifloor(coord_bld, coord);
 708          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 709       }
 710       break;
 711
 712    default:
 713       assert(0);
 714       coord0 = NULL;
 715       coord1 = NULL;
 716       weight = NULL;
 717    }
 718
 719    *x0_out = coord0;
 720    *x1_out = coord1;
 721    *weight_out = weight;
 722 }
 723
 724
 725 /**
 726  * Build LLVM code for texture wrap mode for nearest filtering.
 727  * \param coord  the incoming texcoord (nominally in [0,1])
 728  * \param length  the texture size along one dimension, as int
 729  * \param is_pot  if TRUE, length is a power of two
 730  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 731  */
 732 static LLVMValueRef
 733 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 734                              LLVMValueRef coord,
 735                              LLVMValueRef length,
 736                              boolean is_pot,
 737                              unsigned wrap_mode)
 738 {
 739    struct lp_build_context *coord_bld = &bld->coord_bld;
 740    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 741    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 742    LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
 743    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 744    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 745    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
 746    LLVMValueRef icoord;
 747
 748    switch(wrap_mode) {
 749    case PIPE_TEX_WRAP_REPEAT:
 750       coord = lp_build_mul(coord_bld, coord, length_f);
 751       icoord = lp_build_ifloor(coord_bld, coord);
 752       if (is_pot)
 753          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 754       else
 755          /* Signed remainder won't give the right results for negative
 756           * dividends but unsigned remainder does.*/
 757          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 758       break;
 759
 760    case PIPE_TEX_WRAP_CLAMP:
 761       /* mul by size */
 762       if (bld->static_state->normalized_coords) {
 763          coord = lp_build_mul(coord_bld, coord, length_f);
 764       }
 765       /* floor */
 766       icoord = lp_build_ifloor(coord_bld, coord);
 767       /* clamp to [0, size-1].  Note: int coord builder type */
 768       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 769                               length_minus_one);
 770       break;
 771
 772    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 773       {
 774          LLVMValueRef min, max;
 775          if (bld->static_state->normalized_coords) {
 776             /* min = 1.0 / (2 * length) */
 777             min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 778             /* max = length - min */
 779             max = lp_build_sub(coord_bld, length_f, min);
 780             /* scale coord to length */
 781             coord = lp_build_mul(coord_bld, coord, length_f);
 782          }
 783          else {
 784             /* clamp to [0.5, length - 0.5] */
 785             min = lp_build_const_vec(coord_bld->type, 0.5F);
 786             max = lp_build_sub(coord_bld, length_f, min);
 787          }
 788          /* coord = clamp(coord, min, max) */
 789          coord = lp_build_clamp(coord_bld, coord, min, max);
 790          icoord = lp_build_ifloor(coord_bld, coord);
 791       }
 792       break;
 793
 794    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 795       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 796       {
 797          LLVMValueRef min, max;
 798          if (bld->static_state->normalized_coords) {
 799             /* min = -1.0 / (2 * length) = -0.5 / length */
 800             min = lp_build_mul(coord_bld,
 801                                lp_build_const_vec(coord_bld->type, -0.5F),
 802                                lp_build_rcp(coord_bld, length_f));
 803             /* max = length - min */
 804             max = lp_build_sub(coord_bld, length_f, min);
 805             /* scale coord to length */
 806             coord = lp_build_mul(coord_bld, coord, length_f);
 807          }
 808          else {
 809             /* clamp to [-0.5, length + 0.5] */
 810             min = lp_build_const_vec(coord_bld->type, -0.5F);
 811             max = lp_build_sub(coord_bld, length_f, min);
 812          }
 813          /* coord = clamp(coord, min, max) */
 814          coord = lp_build_clamp(coord_bld, coord, min, max);
 815          icoord = lp_build_ifloor(coord_bld, coord);
 816       }
 817       break;
 818
 819    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 820       {
 821          LLVMValueRef min, max;
 822          /* min = 1.0 / (2 * length) */
 823          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 824          /* max = length - min */
 825          max = lp_build_sub(coord_bld, length_f, min);
 826
 827          /* compute mirror function */
 828          coord = lp_build_coord_mirror(bld, coord);
 829
 830          /* scale coord to length */
 831          coord = lp_build_mul(coord_bld, coord, length_f);
 832
 833          /* coord = clamp(coord, min, max) */
 834          coord = lp_build_clamp(coord_bld, coord, min, max);
 835          icoord = lp_build_ifloor(coord_bld, coord);
 836       }
 837       break;
 838
 839    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 840       coord = lp_build_abs(coord_bld, coord);
 841       coord = lp_build_mul(coord_bld, coord, length_f);
 842       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
 843       icoord = lp_build_ifloor(coord_bld, coord);
 844       break;
 845
 846    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 847       {
 848          LLVMValueRef min, max;
 849          /* min = 1.0 / (2 * length) */
 850          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 851          /* max = length - min */
 852          max = lp_build_sub(coord_bld, length_f, min);
 853
 854          coord = lp_build_abs(coord_bld, coord);
 855          coord = lp_build_mul(coord_bld, coord, length_f);
 856          coord = lp_build_clamp(coord_bld, coord, min, max);
 857          icoord = lp_build_ifloor(coord_bld, coord);
 858       }
 859       break;
 860
 861    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 862       {
 863          LLVMValueRef min, max;
 864          /* min = 1.0 / (2 * length) */
 865          min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
 866          min = lp_build_negate(coord_bld, min);
 867          /* max = length - min */
 868          max = lp_build_sub(coord_bld, length_f, min);
 869
 870          coord = lp_build_abs(coord_bld, coord);
 871          coord = lp_build_mul(coord_bld, coord, length_f);
 872          coord = lp_build_clamp(coord_bld, coord, min, max);
 873          icoord = lp_build_ifloor(coord_bld, coord);
 874       }
 875       break;
 876
 877    default:
 878       assert(0);
 879       icoord = NULL;
 880    }
 881
 882    return icoord;
 883 }
 884
 885
 886 /**
 887  * Codegen equivalent for u_minify().
 888  * Return max(1, base_size >> level);
 889  */
 890 static LLVMValueRef
 891 lp_build_minify(struct lp_build_sample_context *bld,
 892                 LLVMValueRef base_size,
 893                 LLVMValueRef level)
 894 {
 895    LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
 896    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 897    return size;
 898 }
 899
 900
 901 /**
 902  * Generate code to compute texture level of detail (lambda).
 903  * \param s  vector of texcoord s values
 904  * \param t  vector of texcoord t values
 905  * \param r  vector of texcoord r values
 906  * \param shader_lod_bias  vector float with the shader lod bias,
 907  * \param width  scalar int texture width
 908  * \param height  scalar int texture height
 909  * \param depth  scalar int texture depth
 910  */
 911 static LLVMValueRef
 912 lp_build_lod_selector(struct lp_build_sample_context *bld,
 913                       LLVMValueRef s,
 914                       LLVMValueRef t,
 915                       LLVMValueRef r,
 916                       const LLVMValueRef *ddx,
 917                       const LLVMValueRef *ddy,
 918                       LLVMValueRef shader_lod_bias,
 919                       LLVMValueRef width,
 920                       LLVMValueRef height,
 921                       LLVMValueRef depth)
 922
 923 {
 924    if (bld->static_state->min_lod == bld->static_state->max_lod) {
 925       /* User is forcing sampling from a particular mipmap level.
 926        * This is hit during mipmap generation.
 927        */
 928       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 929    }
 930    else {
 931       const int dims = texture_dims(bld->static_state->target);
 932       struct lp_build_context *float_bld = &bld->float_bld;
 933       LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
 934                                                     bld->static_state->lod_bias);
 935       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
 936                                            bld->static_state->min_lod);
 937       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
 938                                            bld->static_state->max_lod);
 939
 940       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 941       LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
 942       LLVMValueRef rho, lod;
 943
 944       /*
 945        * dsdx = abs(s[1] - s[0]);
 946        * dsdy = abs(s[2] - s[0]);
 947        * dtdx = abs(t[1] - t[0]);
 948        * dtdy = abs(t[2] - t[0]);
 949        * drdx = abs(r[1] - r[0]);
 950        * drdy = abs(r[2] - r[0]);
 951        */
 952       dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
 953       dsdx = lp_build_abs(float_bld, dsdx);
 954       dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
 955       dsdy = lp_build_abs(float_bld, dsdy);
 956       if (dims > 1) {
 957          dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
 958          dtdx = lp_build_abs(float_bld, dtdx);
 959          dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
 960          dtdy = lp_build_abs(float_bld, dtdy);
 961          if (dims > 2) {
 962             drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
 963             drdx = lp_build_abs(float_bld, drdx);
 964             drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
 965             drdy = lp_build_abs(float_bld, drdy);
 966          }
 967       }
 968
 969       /* Compute rho = max of all partial derivatives scaled by texture size.
 970        * XXX this could be vectorized somewhat
 971        */
 972       rho = LLVMBuildMul(bld->builder,
 973                          lp_build_max(float_bld, dsdx, dsdy),
 974                          lp_build_int_to_float(float_bld, width), "");
 975       if (dims > 1) {
 976          LLVMValueRef max;
 977          max = LLVMBuildMul(bld->builder,
 978                             lp_build_max(float_bld, dtdx, dtdy),
 979                             lp_build_int_to_float(float_bld, height), "");
 980          rho = lp_build_max(float_bld, rho, max);
 981          if (dims > 2) {
 982             max = LLVMBuildMul(bld->builder,
 983                                lp_build_max(float_bld, drdx, drdy),
 984                                lp_build_int_to_float(float_bld, depth), "");
 985             rho = lp_build_max(float_bld, rho, max);
 986          }
 987       }
 988
 989       /* compute lod = log2(rho) */
 990       lod = lp_build_log2(float_bld, rho);
 991
 992       /* add sampler lod bias */
 993       lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias");
 994
 995       /* add shader lod bias */
 996       /* XXX for now we take only the first element since our lod is scalar */
 997       shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias,
 998                                                 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
 999       lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias");
1000
1001       /* clamp lod */
1002       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
1003
1004       return lod;
1005    }
1006 }
1007
1008
1009 /**
1010  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
1011  * mipmap level index.
1012  * Note: this is all scalar code.
1013  * \param lod  scalar float texture level of detail
1014  * \param level_out  returns integer
1015  */
1016 static void
1017 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1018                            unsigned unit,
1019                            LLVMValueRef lod,
1020                            LLVMValueRef *level_out)
1021 {
1022    struct lp_build_context *float_bld = &bld->float_bld;
1023    struct lp_build_context *int_bld = &bld->int_bld;
1024    LLVMValueRef last_level, level;
1025
1026    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
1027
1028    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
1029                                                bld->builder, unit);
1030
1031    /* convert float lod to integer */
1032    level = lp_build_iround(float_bld, lod);
1033
1034    /* clamp level to legal range of levels */
1035    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
1036 }
1037
1038
1039 /**
1040  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
1041  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
1042  * two mipmap levels and interpolate between them.
1043  */
1044 static void
1045 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1046                            unsigned unit,
1047                            LLVMValueRef lod,
1048                            LLVMValueRef *level0_out,
1049                            LLVMValueRef *level1_out,
1050                            LLVMValueRef *weight_out)
1051 {
1052    struct lp_build_context *float_bld = &bld->float_bld;
1053    struct lp_build_context *int_bld = &bld->int_bld;
1054    LLVMValueRef last_level, level;
1055
1056    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
1057                                                bld->builder, unit);
1058
1059    /* convert float lod to integer */
1060    level = lp_build_ifloor(float_bld, lod);
1061
1062    /* compute level 0 and clamp to legal range of levels */
1063    *level0_out = lp_build_clamp(int_bld, level,
1064                                 int_bld->zero,
1065                                 last_level);
1066    /* compute level 1 and clamp to legal range of levels */
1067    *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
1068    *level1_out = lp_build_min(int_bld, *level1_out, last_level);
1069
1070    *weight_out = lp_build_fract(float_bld, lod);
1071 }
1072
1073
1074 /**
1075  * Generate code to sample a mipmap level with nearest filtering.
1076  * If sampling a cube texture, r = cube face in [0,5].
1077  */
1078 static void
1079 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1080                               LLVMValueRef width_vec,
1081                               LLVMValueRef height_vec,
1082                               LLVMValueRef depth_vec,
1083                               LLVMValueRef row_stride_vec,
1084                               LLVMValueRef img_stride_vec,
1085                               LLVMValueRef data_ptr,
1086                               LLVMValueRef s,
1087                               LLVMValueRef t,
1088                               LLVMValueRef r,
1089                               LLVMValueRef colors_out[4])
1090 {
1091    const int dims = texture_dims(bld->static_state->target);
1092    LLVMValueRef x, y, z;
1093
1094    /*
1095     * Compute integer texcoords.
1096     */
1097    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1098                                     bld->static_state->pot_width,
1099                                     bld->static_state->wrap_s);
1100    lp_build_name(x, "tex.x.wrapped");
1101
1102    if (dims >= 2) {
1103       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1104                                        bld->static_state->pot_height,
1105                                        bld->static_state->wrap_t);
1106       lp_build_name(y, "tex.y.wrapped");
1107
1108       if (dims == 3) {
1109          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1110                                           bld->static_state->pot_height,
1111                                           bld->static_state->wrap_r);
1112          lp_build_name(z, "tex.z.wrapped");
1113       }
1114       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1115          z = r;
1116       }
1117       else {
1118          z = NULL;
1119       }
1120    }
1121    else {
1122       y = z = NULL;
1123    }
1124
1125    /*
1126     * Get texture colors.
1127     */
1128    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1129                              x, y, z,
1130                              row_stride_vec, img_stride_vec,
1131                              data_ptr, colors_out);
1132 }
1133
1134
1135 /**
1136  * Generate code to sample a mipmap level with linear filtering.
1137  * If sampling a cube texture, r = cube face in [0,5].
1138  */
1139 static void
1140 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1141                              LLVMValueRef width_vec,
1142                              LLVMValueRef height_vec,
1143                              LLVMValueRef depth_vec,
1144                              LLVMValueRef row_stride_vec,
1145                              LLVMValueRef img_stride_vec,
1146                              LLVMValueRef data_ptr,
1147                              LLVMValueRef s,
1148                              LLVMValueRef t,
1149                              LLVMValueRef r,
1150                              LLVMValueRef colors_out[4])
1151 {
1152    const int dims = texture_dims(bld->static_state->target);
1153    LLVMValueRef x0, y0, z0, x1, y1, z1;
1154    LLVMValueRef s_fpart, t_fpart, r_fpart;
1155    LLVMValueRef neighbors[2][2][4];
1156    int chan;
1157
1158    /*
1159     * Compute integer texcoords.
1160     */
1161    lp_build_sample_wrap_linear(bld, s, width_vec,
1162                                bld->static_state->pot_width,
1163                                bld->static_state->wrap_s,
1164                                &x0, &x1, &s_fpart);
1165    lp_build_name(x0, "tex.x0.wrapped");
1166    lp_build_name(x1, "tex.x1.wrapped");
1167
1168    if (dims >= 2) {
1169       lp_build_sample_wrap_linear(bld, t, height_vec,
1170                                   bld->static_state->pot_height,
1171                                   bld->static_state->wrap_t,
1172                                   &y0, &y1, &t_fpart);
1173       lp_build_name(y0, "tex.y0.wrapped");
1174       lp_build_name(y1, "tex.y1.wrapped");
1175
1176       if (dims == 3) {
1177          lp_build_sample_wrap_linear(bld, r, depth_vec,
1178                                      bld->static_state->pot_depth,
1179                                      bld->static_state->wrap_r,
1180                                      &z0, &z1, &r_fpart);
1181          lp_build_name(z0, "tex.z0.wrapped");
1182          lp_build_name(z1, "tex.z1.wrapped");
1183       }
1184       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1185          z0 = z1 = r;  /* cube face */
1186          r_fpart = NULL;
1187       }
1188       else {
1189          z0 = z1 = NULL;
1190          r_fpart = NULL;
1191       }
1192    }
1193    else {
1194       y0 = y1 = t_fpart = NULL;
1195       z0 = z1 = r_fpart = NULL;
1196    }
1197
1198    /*
1199     * Get texture colors.
1200     */
1201    /* get x0/x1 texels */
1202    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1203                              x0, y0, z0,
1204                              row_stride_vec, img_stride_vec,
1205                              data_ptr, neighbors[0][0]);
1206    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1207                              x1, y0, z0,
1208                              row_stride_vec, img_stride_vec,
1209                              data_ptr, neighbors[0][1]);
1210
1211    if (dims == 1) {
1212       /* Interpolate two samples from 1D image to produce one color */
1213       for (chan = 0; chan < 4; chan++) {
1214          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1215                                           neighbors[0][0][chan],
1216                                           neighbors[0][1][chan]);
1217       }
1218    }
1219    else {
1220       /* 2D/3D texture */
1221       LLVMValueRef colors0[4];
1222
1223       /* get x0/x1 texels at y1 */
1224       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1225                                 x0, y1, z0,
1226                                 row_stride_vec, img_stride_vec,
1227                                 data_ptr, neighbors[1][0]);
1228       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1229                                 x1, y1, z0,
1230                                 row_stride_vec, img_stride_vec,
1231                                 data_ptr, neighbors[1][1]);
1232
1233       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1234       for (chan = 0; chan < 4; chan++) {
1235          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1236                                           s_fpart, t_fpart,
1237                                           neighbors[0][0][chan],
1238                                           neighbors[0][1][chan],
1239                                           neighbors[1][0][chan],
1240                                           neighbors[1][1][chan]);
1241       }
1242
1243       if (dims == 3) {
1244          LLVMValueRef neighbors1[2][2][4];
1245          LLVMValueRef colors1[4];
1246
1247          /* get x0/x1/y0/y1 texels at z1 */
1248          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1249                                    x0, y0, z1,
1250                                    row_stride_vec, img_stride_vec,
1251                                    data_ptr, neighbors1[0][0]);
1252          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1253                                    x1, y0, z1,
1254                                    row_stride_vec, img_stride_vec,
1255                                    data_ptr, neighbors1[0][1]);
1256          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1257                                    x0, y1, z1,
1258                                    row_stride_vec, img_stride_vec,
1259                                    data_ptr, neighbors1[1][0]);
1260          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1261                                    x1, y1, z1,
1262                                    row_stride_vec, img_stride_vec,
1263                                    data_ptr, neighbors1[1][1]);
1264
1265          /* Bilinear interpolate the four samples from the second Z slice */
1266          for (chan = 0; chan < 4; chan++) {
1267             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1268                                              s_fpart, t_fpart,
1269                                              neighbors1[0][0][chan],
1270                                              neighbors1[0][1][chan],
1271                                              neighbors1[1][0][chan],
1272                                              neighbors1[1][1][chan]);
1273          }
1274
1275          /* Linearly interpolate the two samples from the two 3D slices */
1276          for (chan = 0; chan < 4; chan++) {
1277             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1278                                              r_fpart,
1279                                              colors0[chan], colors1[chan]);
1280          }
1281       }
1282       else {
1283          /* 2D tex */
1284          for (chan = 0; chan < 4; chan++) {
1285             colors_out[chan] = colors0[chan];
1286          }
1287       }
1288    }
1289 }
1290
1291
1292 /** Helper used by lp_build_cube_lookup() */
1293 static LLVMValueRef
1294 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1295 {
1296    /* ima = -0.5 / abs(coord); */
1297    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1298    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1299    LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
1300                                    lp_build_rcp(coord_bld, absCoord));
1301    return ima;
1302 }
1303
1304
1305 /**
1306  * Helper used by lp_build_cube_lookup()
1307  * \param sign  scalar +1 or -1
1308  * \param coord  float vector
1309  * \param ima  float vector
1310  */
1311 static LLVMValueRef
1312 lp_build_cube_coord(struct lp_build_context *coord_bld,
1313                     LLVMValueRef sign, int negate_coord,
1314                     LLVMValueRef coord, LLVMValueRef ima)
1315 {
1316    /* return negate(coord) * ima * sign + 0.5; */
1317    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1318    LLVMValueRef res;
1319
1320    assert(negate_coord == +1 || negate_coord == -1);
1321
1322    if (negate_coord == -1) {
1323       coord = lp_build_negate(coord_bld, coord);
1324    }
1325
1326    res = lp_build_mul(coord_bld, coord, ima);
1327    if (sign) {
1328       sign = lp_build_broadcast_scalar(coord_bld, sign);
1329       res = lp_build_mul(coord_bld, res, sign);
1330    }
1331    res = lp_build_add(coord_bld, res, half);
1332
1333    return res;
1334 }
1335
1336
1337 /** Helper used by lp_build_cube_lookup()
1338  * Return (major_coord >= 0) ? pos_face : neg_face;
1339  */
1340 static LLVMValueRef
1341 lp_build_cube_face(struct lp_build_sample_context *bld,
1342                    LLVMValueRef major_coord,
1343                    unsigned pos_face, unsigned neg_face)
1344 {
1345    LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1346                                     major_coord,
1347                                     bld->float_bld.zero, "");
1348    LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1349    LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1350    LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1351    return res;
1352 }
1353
1354
1355
1356 /**
1357  * Generate code to do cube face selection and per-face texcoords.
1358  */
1359 static void
1360 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1361                      LLVMValueRef s,
1362                      LLVMValueRef t,
1363                      LLVMValueRef r,
1364                      LLVMValueRef *face,
1365                      LLVMValueRef *face_s,
1366                      LLVMValueRef *face_t)
1367 {
1368    struct lp_build_context *float_bld = &bld->float_bld;
1369    struct lp_build_context *coord_bld = &bld->coord_bld;
1370    LLVMValueRef rx, ry, rz;
1371    LLVMValueRef arx, ary, arz;
1372    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1373    LLVMValueRef arx_ge_ary, arx_ge_arz;
1374    LLVMValueRef ary_ge_arx, ary_ge_arz;
1375    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1376    LLVMValueRef rx_pos, ry_pos, rz_pos;
1377
1378    assert(bld->coord_bld.type.length == 4);
1379
1380    /*
1381     * Use the average of the four pixel's texcoords to choose the face.
1382     */
1383    rx = lp_build_mul(float_bld, c25,
1384                      lp_build_sum_vector(&bld->coord_bld, s));
1385    ry = lp_build_mul(float_bld, c25,
1386                      lp_build_sum_vector(&bld->coord_bld, t));
1387    rz = lp_build_mul(float_bld, c25,
1388                      lp_build_sum_vector(&bld->coord_bld, r));
1389
1390    arx = lp_build_abs(float_bld, rx);
1391    ary = lp_build_abs(float_bld, ry);
1392    arz = lp_build_abs(float_bld, rz);
1393
1394    /*
1395     * Compare sign/magnitude of rx,ry,rz to determine face
1396     */
1397    arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1398    arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1399    ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1400    ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1401
1402    arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1403    ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1404
1405    rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1406    ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1407    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1408
1409    {
1410       struct lp_build_flow_context *flow_ctx;
1411       struct lp_build_if_state if_ctx;
1412
1413       flow_ctx = lp_build_flow_create(bld->builder);
1414       lp_build_flow_scope_begin(flow_ctx);
1415
1416       *face_s = bld->coord_bld.undef;
1417       *face_t = bld->coord_bld.undef;
1418       *face = bld->int_bld.undef;
1419
1420       lp_build_name(*face_s, "face_s");
1421       lp_build_name(*face_t, "face_t");
1422       lp_build_name(*face, "face");
1423
1424       lp_build_flow_scope_declare(flow_ctx, face_s);
1425       lp_build_flow_scope_declare(flow_ctx, face_t);
1426       lp_build_flow_scope_declare(flow_ctx, face);
1427
1428       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1429       {
1430          /* +/- X face */
1431          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1432          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1433          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1434          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1435          *face = lp_build_cube_face(bld, rx,
1436                                     PIPE_TEX_FACE_POS_X,
1437                                     PIPE_TEX_FACE_NEG_X);
1438       }
1439       lp_build_else(&if_ctx);
1440       {
1441          struct lp_build_flow_context *flow_ctx2;
1442          struct lp_build_if_state if_ctx2;
1443
1444          LLVMValueRef face_s2 = bld->coord_bld.undef;
1445          LLVMValueRef face_t2 = bld->coord_bld.undef;
1446          LLVMValueRef face2 = bld->int_bld.undef;
1447
1448          flow_ctx2 = lp_build_flow_create(bld->builder);
1449          lp_build_flow_scope_begin(flow_ctx2);
1450          lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1451          lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1452          lp_build_flow_scope_declare(flow_ctx2, &face2);
1453
1454          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1455
1456          lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1457          {
1458             /* +/- Y face */
1459             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1460             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1461             face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1462             face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1463             face2 = lp_build_cube_face(bld, ry,
1464                                        PIPE_TEX_FACE_POS_Y,
1465                                        PIPE_TEX_FACE_NEG_Y);
1466          }
1467          lp_build_else(&if_ctx2);
1468          {
1469             /* +/- Z face */
1470             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1471             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1472             face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1473             face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1474             face2 = lp_build_cube_face(bld, rz,
1475                                        PIPE_TEX_FACE_POS_Z,
1476                                        PIPE_TEX_FACE_NEG_Z);
1477          }
1478          lp_build_endif(&if_ctx2);
1479          lp_build_flow_scope_end(flow_ctx2);
1480          lp_build_flow_destroy(flow_ctx2);
1481
1482          *face_s = face_s2;
1483          *face_t = face_t2;
1484          *face = face2;
1485       }
1486
1487       lp_build_endif(&if_ctx);
1488       lp_build_flow_scope_end(flow_ctx);
1489       lp_build_flow_destroy(flow_ctx);
1490    }
1491 }
1492
1493
1494
1495 /**
1496  * Sample the texture/mipmap using given image filter and mip filter.
1497  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1498  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1499  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1500  */
1501 static void
1502 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1503                        unsigned img_filter,
1504                        unsigned mip_filter,
1505                        LLVMValueRef s,
1506                        LLVMValueRef t,
1507                        LLVMValueRef r,
1508                        LLVMValueRef lod_fpart,
1509                        LLVMValueRef width0_vec,
1510                        LLVMValueRef width1_vec,
1511                        LLVMValueRef height0_vec,
1512                        LLVMValueRef height1_vec,
1513                        LLVMValueRef depth0_vec,
1514                        LLVMValueRef depth1_vec,
1515                        LLVMValueRef row_stride0_vec,
1516                        LLVMValueRef row_stride1_vec,
1517                        LLVMValueRef img_stride0_vec,
1518                        LLVMValueRef img_stride1_vec,
1519                        LLVMValueRef data_ptr0,
1520                        LLVMValueRef data_ptr1,
1521                        LLVMValueRef *colors_out)
1522 {
1523    LLVMValueRef colors0[4], colors1[4];
1524    int chan;
1525
1526    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1527       lp_build_sample_image_nearest(bld,
1528                                     width0_vec, height0_vec, depth0_vec,
1529                                     row_stride0_vec, img_stride0_vec,
1530                                     data_ptr0, s, t, r, colors0);
1531
1532       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1533          /* sample the second mipmap level, and interp */
1534          lp_build_sample_image_nearest(bld,
1535                                        width1_vec, height1_vec, depth1_vec,
1536                                        row_stride1_vec, img_stride1_vec,
1537                                        data_ptr1, s, t, r, colors1);
1538       }
1539    }
1540    else {
1541       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1542
1543       lp_build_sample_image_linear(bld,
1544                                    width0_vec, height0_vec, depth0_vec,
1545                                    row_stride0_vec, img_stride0_vec,
1546                                    data_ptr0, s, t, r, colors0);
1547
1548       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1549          /* sample the second mipmap level, and interp */
1550          lp_build_sample_image_linear(bld,
1551                                       width1_vec, height1_vec, depth1_vec,
1552                                       row_stride1_vec, img_stride1_vec,
1553                                       data_ptr1, s, t, r, colors1);
1554       }
1555    }
1556
1557    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1558       /* interpolate samples from the two mipmap levels */
1559       for (chan = 0; chan < 4; chan++) {
1560          colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1561                                           colors0[chan], colors1[chan]);
1562       }
1563    }
1564    else {
1565       /* use first/only level's colors */
1566       for (chan = 0; chan < 4; chan++) {
1567          colors_out[chan] = colors0[chan];
1568       }
1569    }
1570 }
1571
1572
1573
1574 /**
1575  * General texture sampling codegen.
1576  * This function handles texture sampling for all texture targets (1D,
1577  * 2D, 3D, cube) and all filtering modes.
1578  */
1579 static void
1580 lp_build_sample_general(struct lp_build_sample_context *bld,
1581                         unsigned unit,
1582                         LLVMValueRef s,
1583                         LLVMValueRef t,
1584                         LLVMValueRef r,
1585                         const LLVMValueRef *ddx,
1586                         const LLVMValueRef *ddy,
1587                         LLVMValueRef lodbias,
1588                         LLVMValueRef width,
1589                         LLVMValueRef height,
1590                         LLVMValueRef depth,
1591                         LLVMValueRef width_vec,
1592                         LLVMValueRef height_vec,
1593                         LLVMValueRef depth_vec,
1594                         LLVMValueRef row_stride_array,
1595                         LLVMValueRef img_stride_array,
1596                         LLVMValueRef data_array,
1597                         LLVMValueRef *colors_out)
1598 {
1599    struct lp_build_context *float_bld = &bld->float_bld;
1600    const unsigned mip_filter = bld->static_state->min_mip_filter;
1601    const unsigned min_filter = bld->static_state->min_img_filter;
1602    const unsigned mag_filter = bld->static_state->mag_img_filter;
1603    const int dims = texture_dims(bld->static_state->target);
1604    LLVMValueRef lod = NULL, lod_fpart = NULL;
1605    LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1606    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1607    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1608    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1609    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1610    LLVMValueRef data_ptr0, data_ptr1 = NULL;
1611
1612    /*
1613    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1614           mip_filter, min_filter, mag_filter);
1615    */
1616
1617    /*
1618     * Compute the level of detail (float).
1619     */
1620    if (min_filter != mag_filter ||
1621        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1622       /* Need to compute lod either to choose mipmap levels or to
1623        * distinguish between minification/magnification with one mipmap level.
1624        */
1625       lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy, lodbias,
1626                                   width, height, depth);
1627    }
1628
1629    /*
1630     * Compute integer mipmap level(s) to fetch texels from.
1631     */
1632    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1633       /* always use mip level 0 */
1634       ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1635    }
1636    else {
1637       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1638          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1639       }
1640       else {
1641          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1642          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1643                                     &lod_fpart);
1644          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1645       }
1646    }
1647
1648    /*
1649     * Convert scalar integer mipmap levels into vectors.
1650     */
1651    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1652    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1653       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1654
1655    /*
1656     * Compute width, height at mipmap level 'ilevel0'
1657     */
1658    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1659    if (dims >= 2) {
1660       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1661       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1662                                                       ilevel0);
1663       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1664          img_stride0_vec = lp_build_get_level_stride_vec(bld,
1665                                                          img_stride_array,
1666                                                          ilevel0);
1667          if (dims == 3) {
1668             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1669          }
1670       }
1671    }
1672    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1673       /* compute width, height, depth for second mipmap level at 'ilevel1' */
1674       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1675       if (dims >= 2) {
1676          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1677          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1678                                                          ilevel1);
1679          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1680             img_stride1_vec = lp_build_get_level_stride_vec(bld,
1681                                                             img_stride_array,
1682                                                             ilevel1);
1683             if (dims ==3) {
1684                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1685             }
1686          }
1687       }
1688    }
1689
1690    /*
1691     * Choose cube face, recompute per-face texcoords.
1692     */
1693    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1694       LLVMValueRef face, face_s, face_t;
1695       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1696       s = face_s; /* vec */
1697       t = face_t; /* vec */
1698       /* use 'r' to indicate cube face */
1699       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1700    }
1701
1702    /*
1703     * Get pointer(s) to image data for mipmap level(s).
1704     */
1705    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1706    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1707       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1708    }
1709
1710    /*
1711     * Get/interpolate texture colors.
1712     */
1713    if (min_filter == mag_filter) {
1714       /* no need to distinquish between minification and magnification */
1715       lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1716                              width0_vec, width1_vec,
1717                              height0_vec, height1_vec,
1718                              depth0_vec, depth1_vec,
1719                              row_stride0_vec, row_stride1_vec,
1720                              img_stride0_vec, img_stride1_vec,
1721                              data_ptr0, data_ptr1,
1722                              colors_out);
1723    }
1724    else {
1725       /* Emit conditional to choose min image filter or mag image filter
1726        * depending on the lod being >0 or <= 0, respectively.
1727        */
1728       struct lp_build_flow_context *flow_ctx;
1729       struct lp_build_if_state if_ctx;
1730       LLVMValueRef minify;
1731
1732       flow_ctx = lp_build_flow_create(bld->builder);
1733       lp_build_flow_scope_begin(flow_ctx);
1734
1735       lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1736       lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1737       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1738       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1739
1740       /* minify = lod > 0.0 */
1741       minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1742                              lod, float_bld->zero, "");
1743
1744       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1745       {
1746          /* Use the minification filter */
1747          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1748                                 s, t, r, lod_fpart,
1749                                 width0_vec, width1_vec,
1750                                 height0_vec, height1_vec,
1751                                 depth0_vec, depth1_vec,
1752                                 row_stride0_vec, row_stride1_vec,
1753                                 img_stride0_vec, img_stride1_vec,
1754                                 data_ptr0, data_ptr1,
1755                                 colors_out);
1756       }
1757       lp_build_else(&if_ctx);
1758       {
1759          /* Use the magnification filter */
1760          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1761                                 s, t, r, lod_fpart,
1762                                 width0_vec, width1_vec,
1763                                 height0_vec, height1_vec,
1764                                 depth0_vec, depth1_vec,
1765                                 row_stride0_vec, row_stride1_vec,
1766                                 img_stride0_vec, img_stride1_vec,
1767                                 data_ptr0, data_ptr1,
1768                                 colors_out);
1769       }
1770       lp_build_endif(&if_ctx);
1771
1772       lp_build_flow_scope_end(flow_ctx);
1773       lp_build_flow_destroy(flow_ctx);
1774    }
1775 }
1776
1777
1778
1779 static void
1780 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
1781                           struct lp_type dst_type,
1782                           LLVMValueRef packed,
1783                           LLVMValueRef *rgba)
1784 {
1785    LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
1786    unsigned chan;
1787
1788    /* Decode the input vector components */
1789    for (chan = 0; chan < 4; ++chan) {
1790       unsigned start = chan*8;
1791       unsigned stop = start + 8;
1792       LLVMValueRef input;
1793
1794       input = packed;
1795
1796       if(start)
1797          input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
1798
1799       if(stop < 32)
1800          input = LLVMBuildAnd(builder, input, mask, "");
1801
1802       input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
1803
1804       rgba[chan] = input;
1805    }
1806 }
1807
1808
1809 static void
1810 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1811                               LLVMValueRef s,
1812                               LLVMValueRef t,
1813                               LLVMValueRef width,
1814                               LLVMValueRef height,
1815                               LLVMValueRef stride_array,
1816                               LLVMValueRef data_array,
1817                               LLVMValueRef *texel)
1818 {
1819    LLVMBuilderRef builder = bld->builder;
1820    struct lp_build_context i32, h16, u8n;
1821    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1822    LLVMValueRef i32_c8, i32_c128, i32_c255;
1823    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1824    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1825    LLVMValueRef x0, x1;
1826    LLVMValueRef y0, y1;
1827    LLVMValueRef neighbors[2][2];
1828    LLVMValueRef neighbors_lo[2][2];
1829    LLVMValueRef neighbors_hi[2][2];
1830    LLVMValueRef packed, packed_lo, packed_hi;
1831    LLVMValueRef unswizzled[4];
1832    LLVMValueRef stride;
1833
1834    assert(bld->static_state->target == PIPE_TEXTURE_2D);
1835    assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1836    assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1837    assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1838
1839    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1840    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1841    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1842
1843    i32_vec_type = lp_build_vec_type(i32.type);
1844    h16_vec_type = lp_build_vec_type(h16.type);
1845    u8n_vec_type = lp_build_vec_type(u8n.type);
1846
1847    if (bld->static_state->normalized_coords) {
1848       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1849       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1850       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1851       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1852       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1853    }
1854
1855    /* scale coords by 256 (8 fractional bits) */
1856    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1857    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1858
1859    /* convert float to int */
1860    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1861    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1862
1863    /* subtract 0.5 (add -128) */
1864    i32_c128 = lp_build_const_int_vec(i32.type, -128);
1865    s = LLVMBuildAdd(builder, s, i32_c128, "");
1866    t = LLVMBuildAdd(builder, t, i32_c128, "");
1867
1868    /* compute floor (shift right 8) */
1869    i32_c8 = lp_build_const_int_vec(i32.type, 8);
1870    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1871    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1872
1873    /* compute fractional part (AND with 0xff) */
1874    i32_c255 = lp_build_const_int_vec(i32.type, 255);
1875    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1876    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1877
1878    x0 = s_ipart;
1879    y0 = t_ipart;
1880
1881    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1882    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1883
1884    x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1885                                  bld->static_state->wrap_s);
1886    y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1887                                  bld->static_state->wrap_t);
1888
1889    x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1890                                  bld->static_state->wrap_s);
1891    y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1892                                  bld->static_state->wrap_t);
1893
1894    /*
1895     * Transform 4 x i32 in
1896     *
1897     *   s_fpart = {s0, s1, s2, s3}
1898     *
1899     * into 8 x i16
1900     *
1901     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1902     *
1903     * into two 8 x i16
1904     *
1905     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1906     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1907     *
1908     * and likewise for t_fpart. There is no risk of loosing precision here
1909     * since the fractional parts only use the lower 8bits.
1910     */
1911
1912    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1913    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1914
1915    {
1916       LLVMTypeRef elem_type = LLVMInt32Type();
1917       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1918       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1919       LLVMValueRef shuffle_lo;
1920       LLVMValueRef shuffle_hi;
1921       unsigned i, j;
1922
1923       for(j = 0; j < h16.type.length; j += 4) {
1924          unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1925          LLVMValueRef index;
1926
1927          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1928          for(i = 0; i < 4; ++i)
1929             shuffles_lo[j + i] = index;
1930
1931          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1932          for(i = 0; i < 4; ++i)
1933             shuffles_hi[j + i] = index;
1934       }
1935
1936       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1937       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1938
1939       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1940       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1941       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1942       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1943    }
1944
1945    stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1946
1947    /*
1948     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1949     *
1950     *   rgba0 rgba1 rgba2 rgba3
1951     *
1952     * bit cast them into 16 x u8
1953     *
1954     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1955     *
1956     * unpack them into two 8 x i16:
1957     *
1958     *   r0 g0 b0 a0 r1 g1 b1 a1
1959     *   r2 g2 b2 a2 r3 g3 b3 a3
1960     *
1961     * The higher 8 bits of the resulting elements will be zero.
1962     */
1963
1964    neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1965    neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1966    neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1967    neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1968
1969    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1970    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1971    neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1972    neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1973
1974    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1975    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1976    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1977    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1978
1979    /*
1980     * Linear interpolate with 8.8 fixed point.
1981     */
1982
1983    packed_lo = lp_build_lerp_2d(&h16,
1984                                 s_fpart_lo, t_fpart_lo,
1985                                 neighbors_lo[0][0],
1986                                 neighbors_lo[0][1],
1987                                 neighbors_lo[1][0],
1988                                 neighbors_lo[1][1]);
1989
1990    packed_hi = lp_build_lerp_2d(&h16,
1991                                 s_fpart_hi, t_fpart_hi,
1992                                 neighbors_hi[0][0],
1993                                 neighbors_hi[0][1],
1994                                 neighbors_hi[1][0],
1995                                 neighbors_hi[1][1]);
1996
1997    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1998
1999    /*
2000     * Convert to SoA and swizzle.
2001     */
2002
2003    packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
2004
2005    lp_build_rgba8_to_f32_soa(bld->builder,
2006                              bld->texel_type,
2007                              packed, unswizzled);
2008
2009    lp_build_format_swizzle_soa(bld->format_desc,
2010                                bld->texel_type, unswizzled,
2011                                texel);
2012
2013    lp_build_swizzle_soa(bld, texel);
2014 }
2015
2016
2017 static void
2018 lp_build_sample_compare(struct lp_build_sample_context *bld,
2019                         LLVMValueRef p,
2020                         LLVMValueRef *texel)
2021 {
2022    struct lp_build_context *texel_bld = &bld->texel_bld;
2023    LLVMValueRef res;
2024    unsigned chan;
2025
2026    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
2027       return;
2028
2029    /* TODO: Compare before swizzling, to avoid redundant computations */
2030    res = NULL;
2031    for(chan = 0; chan < 4; ++chan) {
2032       LLVMValueRef cmp;
2033       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
2034       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
2035
2036       if(res)
2037          res = lp_build_add(texel_bld, res, cmp);
2038       else
2039          res = cmp;
2040    }
2041
2042    assert(res);
2043    res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
2044
2045    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2046    for(chan = 0; chan < 3; ++chan)
2047       texel[chan] = res;
2048    texel[3] = texel_bld->one;
2049 }
2050
2051
2052 /**
2053  * Just set texels to white instead of actually sampling the texture.
2054  * For debugging.
2055  */
2056 static void
2057 lp_build_sample_nop(struct lp_build_sample_context *bld,
2058                     LLVMValueRef *texel)
2059 {
2060    struct lp_build_context *texel_bld = &bld->texel_bld;
2061    unsigned chan;
2062
2063    for (chan = 0; chan < 4; chan++) {
2064       /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
2065       texel[chan] = texel_bld->one;
2066    }
2067 }
2068
2069
2070 /**
2071  * Build texture sampling code.
2072  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2073  * R, G, B, A.
2074  * \param type  vector float type to use for coords, etc.
2075  */
2076 void
2077 lp_build_sample_soa(LLVMBuilderRef builder,
2078                     const struct lp_sampler_static_state *static_state,
2079                     struct lp_sampler_dynamic_state *dynamic_state,
2080                     struct lp_type type,
2081                     unsigned unit,
2082                     unsigned num_coords,
2083                     const LLVMValueRef *coords,
2084                     const LLVMValueRef *ddx,
2085                     const LLVMValueRef *ddy,
2086                     LLVMValueRef lodbias,
2087                     LLVMValueRef *texel)
2088 {
2089    struct lp_build_sample_context bld;
2090    LLVMValueRef width, width_vec;
2091    LLVMValueRef height, height_vec;
2092    LLVMValueRef depth, depth_vec;
2093    LLVMValueRef row_stride_array, img_stride_array;
2094    LLVMValueRef data_array;
2095    LLVMValueRef s;
2096    LLVMValueRef t;
2097    LLVMValueRef r;
2098
2099    /* Setup our build context */
2100    memset(&bld, 0, sizeof bld);
2101    bld.builder = builder;
2102    bld.static_state = static_state;
2103    bld.dynamic_state = dynamic_state;
2104    bld.format_desc = util_format_description(static_state->format);
2105
2106    bld.float_type = lp_type_float(32);
2107    bld.int_type = lp_type_int(32);
2108    bld.coord_type = type;
2109    bld.uint_coord_type = lp_uint_type(type);
2110    bld.int_coord_type = lp_int_type(type);
2111    bld.texel_type = type;
2112
2113    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2114    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2115    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2116    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2117    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2118    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2119
2120    /* Get the dynamic state */
2121    width = dynamic_state->width(dynamic_state, builder, unit);
2122    height = dynamic_state->height(dynamic_state, builder, unit);
2123    depth = dynamic_state->depth(dynamic_state, builder, unit);
2124    row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2125    img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2126    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2127    /* Note that data_array is an array[level] of pointers to texture images */
2128
2129    s = coords[0];
2130    t = coords[1];
2131    r = coords[2];
2132
2133    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2134    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2135    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2136
2137    if (0) {
2138       /* For debug: no-op texture sampling */
2139       lp_build_sample_nop(&bld, texel);
2140    }
2141    else if (util_format_is_rgba8_variant(bld.format_desc) &&
2142             static_state->target == PIPE_TEXTURE_2D &&
2143             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2144             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2145             static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2146             is_simple_wrap_mode(static_state->wrap_s) &&
2147             is_simple_wrap_mode(static_state->wrap_t)) {
2148       /* special case */
2149       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2150                                     row_stride_array, data_array, texel);
2151    }
2152    else {
2153       lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lodbias,
2154                               width, height, depth,
2155                               width_vec, height_vec, depth_vec,
2156                               row_stride_array, img_stride_array,
2157                               data_array,
2158                               texel);
2159    }
2160
2161    lp_build_sample_compare(&bld, r, texel);
2162 }