src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_flow.h"
  52 #include "lp_bld_gather.h"
  53 #include "lp_bld_format.h"
  54 #include "lp_bld_sample.h"
  55 #include "lp_bld_quad.h"
  56
  57
  58 /**
  59  * Keep all information for sampling code generation in a single place.
  60  */
  61 struct lp_build_sample_context
  62 {
  63    LLVMBuilderRef builder;
  64
  65    const struct lp_sampler_static_state *static_state;
  66
  67    struct lp_sampler_dynamic_state *dynamic_state;
  68
  69    const struct util_format_description *format_desc;
  70
  71    /** regular scalar float type */
  72    struct lp_type float_type;
  73    struct lp_build_context float_bld;
  74
  75    /** regular scalar float type */
  76    struct lp_type int_type;
  77    struct lp_build_context int_bld;
  78
  79    /** Incoming coordinates type and build context */
  80    struct lp_type coord_type;
  81    struct lp_build_context coord_bld;
  82
  83    /** Unsigned integer coordinates */
  84    struct lp_type uint_coord_type;
  85    struct lp_build_context uint_coord_bld;
  86
  87    /** Signed integer coordinates */
  88    struct lp_type int_coord_type;
  89    struct lp_build_context int_coord_bld;
  90
  91    /** Output texels type and build context */
  92    struct lp_type texel_type;
  93    struct lp_build_context texel_bld;
  94 };
  95
  96
  97 /**
  98  * Does the given texture wrap mode allow sampling the texture border color?
  99  * XXX maybe move this into gallium util code.
 100  */
 101 static boolean
 102 wrap_mode_uses_border_color(unsigned mode)
 103 {
 104    switch (mode) {
 105    case PIPE_TEX_WRAP_REPEAT:
 106    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 107    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 108    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 109       return FALSE;
 110    case PIPE_TEX_WRAP_CLAMP:
 111    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 112    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 113    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 114       return TRUE;
 115    default:
 116       assert(0 && "unexpected wrap mode");
 117       return FALSE;
 118    }
 119 }
 120
 121
 122 static LLVMValueRef
 123 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 124                           LLVMValueRef data_array, LLVMValueRef level)
 125 {
 126    LLVMValueRef indexes[2], data_ptr;
 127    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 128    indexes[1] = level;
 129    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 130    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 131    return data_ptr;
 132 }
 133
 134
 135 static LLVMValueRef
 136 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 137                                 LLVMValueRef data_array, int level)
 138 {
 139    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 140    return lp_build_get_mipmap_level(bld, data_array, lvl);
 141 }
 142
 143
 144 /**
 145  * Dereference stride_array[mipmap_level] array to get a stride.
 146  * Return stride as a vector.
 147  */
 148 static LLVMValueRef
 149 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 150                               LLVMValueRef stride_array, LLVMValueRef level)
 151 {
 152    LLVMValueRef indexes[2], stride;
 153    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 154    indexes[1] = level;
 155    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 156    stride = LLVMBuildLoad(bld->builder, stride, "");
 157    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 158    return stride;
 159 }
 160
 161
 162 /** Dereference stride_array[0] array to get a stride (as vector). */
 163 static LLVMValueRef
 164 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 165                                     LLVMValueRef stride_array, int level)
 166 {
 167    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 168    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 169 }
 170
 171
 172 static int
 173 texture_dims(enum pipe_texture_target tex)
 174 {
 175    switch (tex) {
 176    case PIPE_TEXTURE_1D:
 177       return 1;
 178    case PIPE_TEXTURE_2D:
 179    case PIPE_TEXTURE_RECT:
 180    case PIPE_TEXTURE_CUBE:
 181       return 2;
 182    case PIPE_TEXTURE_3D:
 183       return 3;
 184    default:
 185       assert(0 && "bad texture target in texture_dims()");
 186       return 2;
 187    }
 188 }
 189
 190
 191 static void
 192 apply_sampler_swizzle(struct lp_build_sample_context *bld,
 193                       LLVMValueRef *texel)
 194 {
 195    unsigned char swizzles[4];
 196
 197    swizzles[0] = bld->static_state->swizzle_r;
 198    swizzles[1] = bld->static_state->swizzle_g;
 199    swizzles[2] = bld->static_state->swizzle_b;
 200    swizzles[3] = bld->static_state->swizzle_a;
 201
 202    lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
 203 }
 204
 205
 206
 207 /**
 208  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 209  * The computation depends on whether the texture is 1D, 2D or 3D.
 210  * The result, texel, will be:
 211  *   texel[0] = red values
 212  *   texel[1] = green values
 213  *   texel[2] = blue values
 214  *   texel[3] = alpha values
 215  */
 216 static void
 217 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 218                           LLVMValueRef width,
 219                           LLVMValueRef height,
 220                           LLVMValueRef depth,
 221                           LLVMValueRef x,
 222                           LLVMValueRef y,
 223                           LLVMValueRef z,
 224                           LLVMValueRef y_stride,
 225                           LLVMValueRef z_stride,
 226                           LLVMValueRef data_ptr,
 227                           LLVMValueRef texel_out[4])
 228 {
 229    const int dims = texture_dims(bld->static_state->target);
 230    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 231    LLVMValueRef offset;
 232    LLVMValueRef i, j;
 233    LLVMValueRef use_border = NULL;
 234
 235    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 236    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 237       LLVMValueRef b1, b2;
 238       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 239       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 240       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 241    }
 242
 243    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 244       LLVMValueRef b1, b2;
 245       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 246       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 247       if (use_border) {
 248          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 249          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 250       }
 251       else {
 252          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 253       }
 254    }
 255
 256    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 257       LLVMValueRef b1, b2;
 258       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 259       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 260       if (use_border) {
 261          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 262          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 263       }
 264       else {
 265          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 266       }
 267    }
 268
 269    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 270    lp_build_sample_offset(&bld->uint_coord_bld,
 271                           bld->format_desc,
 272                           x, y, z, y_stride, z_stride,
 273                           &offset, &i, &j);
 274
 275    if (use_border) {
 276       /* If we can sample the border color, it means that texcoords may
 277        * lie outside the bounds of the texture image.  We need to do
 278        * something to prevent reading out of bounds and causing a segfault.
 279        *
 280        * Simply AND the texture coords with !use_border.  This will cause
 281        * coords which are out of bounds to become zero.  Zero's guaranteed
 282        * to be inside the texture image.
 283        */
 284       offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
 285    }
 286
 287    lp_build_fetch_rgba_soa(bld->builder,
 288                            bld->format_desc,
 289                            bld->texel_type,
 290                            data_ptr, offset,
 291                            i, j,
 292                            texel_out);
 293
 294    apply_sampler_swizzle(bld, texel_out);
 295
 296    /*
 297     * Note: if we find an app which frequently samples the texture border
 298     * we might want to implement a true conditional here to avoid sampling
 299     * the texture whenever possible (since that's quite a bit of code).
 300     * Ex:
 301     *   if (use_border) {
 302     *      texel = border_color;
 303     *   }
 304     *   else {
 305     *      texel = sample_texture(coord);
 306     *   }
 307     * As it is now, we always sample the texture, then selectively replace
 308     * the texel color results with the border color.
 309     */
 310
 311    if (use_border) {
 312       /* select texel color or border color depending on use_border */
 313       int chan;
 314       for (chan = 0; chan < 4; chan++) {
 315          LLVMValueRef border_chan =
 316             lp_build_const_vec(bld->texel_type,
 317                                   bld->static_state->border_color[chan]);
 318          texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 319                                            border_chan, texel_out[chan]);
 320       }
 321    }
 322 }
 323
 324
 325 /**
 326  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 327  */
 328 static LLVMValueRef
 329 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 330                       LLVMValueRef coord)
 331 {
 332    struct lp_build_context *coord_bld = &bld->coord_bld;
 333    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 334    LLVMValueRef fract, flr, isOdd;
 335
 336    /* fract = coord - floor(coord) */
 337    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 338
 339    /* flr = ifloor(coord); */
 340    flr = lp_build_ifloor(coord_bld, coord);
 341
 342    /* isOdd = flr & 1 */
 343    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 344
 345    /* make coord positive or negative depending on isOdd */
 346    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 347
 348    /* convert isOdd to float */
 349    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 350
 351    /* add isOdd to coord */
 352    coord = lp_build_add(coord_bld, coord, isOdd);
 353
 354    return coord;
 355 }
 356
 357
 358 /**
 359  * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
 360  * Return whether the given mode is supported by that function.
 361  */
 362 static boolean
 363 is_simple_wrap_mode(unsigned mode)
 364 {
 365    switch (mode) {
 366    case PIPE_TEX_WRAP_REPEAT:
 367    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 368       return TRUE;
 369    default:
 370       return FALSE;
 371    }
 372 }
 373
 374
 375 /**
 376  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 377  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 378  * \param length  the texture size along one dimension
 379  * \param is_pot  if TRUE, length is a power of two
 380  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 381  * \param i0  resulting sub-block pixel coordinate for coord0
 382  */
 383 static void
 384 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
 385                                  unsigned block_length,
 386                                  LLVMValueRef coord,
 387                                  LLVMValueRef length,
 388                                  LLVMValueRef stride,
 389                                  boolean is_pot,
 390                                  unsigned wrap_mode,
 391                                  LLVMValueRef *out_offset,
 392                                  LLVMValueRef *out_i)
 393 {
 394    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 395    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 396    LLVMValueRef length_minus_one;
 397
 398    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 399
 400    switch(wrap_mode) {
 401    case PIPE_TEX_WRAP_REPEAT:
 402       if(is_pot)
 403          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 404       else
 405          /* Signed remainder won't give the right results for negative
 406           * dividends but unsigned remainder does.*/
 407          coord = LLVMBuildURem(bld->builder, coord, length, "");
 408       break;
 409
 410    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 411       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 412       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 413       break;
 414
 415    case PIPE_TEX_WRAP_CLAMP:
 416    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 417    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 418    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 419    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 420    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 421    default:
 422       assert(0);
 423    }
 424
 425    lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
 426                                   out_offset, out_i);
 427 }
 428
 429
 430 /**
 431  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 432  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 433  * \param length  the texture size along one dimension
 434  * \param stride  pixel stride along the coordinate axis
 435  * \param block_length  is the length of the pixel block along the
 436  *                      coordinate axis
 437  * \param is_pot  if TRUE, length is a power of two
 438  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 439  * \param offset0  resulting relative offset for coord0
 440  * \param offset1  resulting relative offset for coord0 + 1
 441  * \param i0  resulting sub-block pixel coordinate for coord0
 442  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 443  */
 444 static void
 445 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 446                                 unsigned block_length,
 447                                 LLVMValueRef coord0,
 448                                 LLVMValueRef length,
 449                                 LLVMValueRef stride,
 450                                 boolean is_pot,
 451                                 unsigned wrap_mode,
 452                                 LLVMValueRef *offset0,
 453                                 LLVMValueRef *offset1,
 454                                 LLVMValueRef *i0,
 455                                 LLVMValueRef *i1)
 456 {
 457    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 458    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 459    LLVMValueRef length_minus_one;
 460    LLVMValueRef lmask, umask, mask;
 461
 462    if (block_length != 1) {
 463       /*
 464        * If the pixel block covers more than one pixel then there is no easy
 465        * way to calculate offset1 relative to offset0. Instead, compute them
 466        * independently.
 467        */
 468
 469       LLVMValueRef coord1;
 470
 471       lp_build_sample_wrap_nearest_int(bld,
 472                                        block_length,
 473                                        coord0,
 474                                        length,
 475                                        stride,
 476                                        is_pot,
 477                                        wrap_mode,
 478                                        offset0, i0);
 479
 480       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 481
 482       lp_build_sample_wrap_nearest_int(bld,
 483                                        block_length,
 484                                        coord1,
 485                                        length,
 486                                        stride,
 487                                        is_pot,
 488                                        wrap_mode,
 489                                        offset1, i1);
 490
 491       return;
 492    }
 493
 494    /*
 495     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
 496     * multiplication.
 497     */
 498
 499    *i0 = uint_coord_bld->zero;
 500    *i1 = uint_coord_bld->zero;
 501
 502    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 503
 504    switch(wrap_mode) {
 505    case PIPE_TEX_WRAP_REPEAT:
 506       if (is_pot) {
 507          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 508       }
 509       else {
 510          /* Signed remainder won't give the right results for negative
 511           * dividends but unsigned remainder does.*/
 512          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 513       }
 514
 515       mask = lp_build_compare(bld->builder, int_coord_bld->type,
 516                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 517
 518       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 519       *offset1 = LLVMBuildAnd(bld->builder,
 520                               lp_build_add(uint_coord_bld, *offset0, stride),
 521                               mask, "");
 522       break;
 523
 524    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 525       lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 526                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 527       umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 528                                PIPE_FUNC_LESS, coord0, length_minus_one);
 529
 530       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 531       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 532
 533       mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
 534
 535       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 536       *offset1 = lp_build_add(uint_coord_bld,
 537                               *offset0,
 538                               LLVMBuildAnd(bld->builder, stride, mask, ""));
 539       break;
 540
 541    case PIPE_TEX_WRAP_CLAMP:
 542    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 543    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 544    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 545    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 546    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 547    default:
 548       assert(0);
 549       *offset0 = uint_coord_bld->zero;
 550       *offset1 = uint_coord_bld->zero;
 551       break;
 552    }
 553 }
 554
 555
 556 /**
 557  * Build LLVM code for texture wrap mode for linear filtering.
 558  * \param x0_out  returns first integer texcoord
 559  * \param x1_out  returns second integer texcoord
 560  * \param weight_out  returns linear interpolation weight
 561  */
 562 static void
 563 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 564                             LLVMValueRef coord,
 565                             LLVMValueRef length,
 566                             boolean is_pot,
 567                             unsigned wrap_mode,
 568                             LLVMValueRef *x0_out,
 569                             LLVMValueRef *x1_out,
 570                             LLVMValueRef *weight_out)
 571 {
 572    struct lp_build_context *coord_bld = &bld->coord_bld;
 573    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 574    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 575    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
 576    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 577    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 578    LLVMValueRef coord0, coord1, weight;
 579
 580    switch(wrap_mode) {
 581    case PIPE_TEX_WRAP_REPEAT:
 582       /* mul by size and subtract 0.5 */
 583       coord = lp_build_mul(coord_bld, coord, length_f);
 584       coord = lp_build_sub(coord_bld, coord, half);
 585       /* convert to int */
 586       coord0 = lp_build_ifloor(coord_bld, coord);
 587       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 588       /* compute lerp weight */
 589       weight = lp_build_fract(coord_bld, coord);
 590       /* repeat wrap */
 591       if (is_pot) {
 592          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 593          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 594       }
 595       else {
 596          /* Signed remainder won't give the right results for negative
 597           * dividends but unsigned remainder does.*/
 598          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 599          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 600       }
 601       break;
 602
 603    case PIPE_TEX_WRAP_CLAMP:
 604       if (bld->static_state->normalized_coords) {
 605          /* scale coord to length */
 606          coord = lp_build_mul(coord_bld, coord, length_f);
 607       }
 608
 609       /* clamp to [0, length] */
 610       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 611
 612       coord = lp_build_sub(coord_bld, coord, half);
 613
 614       weight = lp_build_fract(coord_bld, coord);
 615       coord0 = lp_build_ifloor(coord_bld, coord);
 616       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 617       break;
 618
 619    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 620       if (bld->static_state->normalized_coords) {
 621          /* clamp to [0,1] */
 622          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 623          /* mul by tex size and subtract 0.5 */
 624          coord = lp_build_mul(coord_bld, coord, length_f);
 625          coord = lp_build_sub(coord_bld, coord, half);
 626       }
 627       else {
 628          LLVMValueRef min, max;
 629          /* clamp to [0.5, length - 0.5] */
 630          min = half;
 631          max = lp_build_sub(coord_bld, length_f, min);
 632          coord = lp_build_clamp(coord_bld, coord, min, max);
 633       }
 634       /* compute lerp weight */
 635       weight = lp_build_fract(coord_bld, coord);
 636       /* coord0 = floor(coord); */
 637       coord0 = lp_build_ifloor(coord_bld, coord);
 638       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 639       /* coord0 = max(coord0, 0) */
 640       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 641       /* coord1 = min(coord1, length-1) */
 642       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 643       break;
 644
 645    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 646       {
 647          LLVMValueRef min, max;
 648          if (bld->static_state->normalized_coords) {
 649             /* scale coord to length */
 650             coord = lp_build_mul(coord_bld, coord, length_f);
 651          }
 652          /* clamp to [-0.5, length + 0.5] */
 653          min = lp_build_const_vec(coord_bld->type, -0.5F);
 654          max = lp_build_sub(coord_bld, length_f, min);
 655          coord = lp_build_clamp(coord_bld, coord, min, max);
 656          coord = lp_build_sub(coord_bld, coord, half);
 657          /* compute lerp weight */
 658          weight = lp_build_fract(coord_bld, coord);
 659          /* convert to int */
 660          coord0 = lp_build_ifloor(coord_bld, coord);
 661          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 662       }
 663       break;
 664
 665    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 666       /* compute mirror function */
 667       coord = lp_build_coord_mirror(bld, coord);
 668
 669       /* scale coord to length */
 670       coord = lp_build_mul(coord_bld, coord, length_f);
 671       coord = lp_build_sub(coord_bld, coord, half);
 672
 673       /* compute lerp weight */
 674       weight = lp_build_fract(coord_bld, coord);
 675
 676       /* convert to int coords */
 677       coord0 = lp_build_ifloor(coord_bld, coord);
 678       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 679
 680       /* coord0 = max(coord0, 0) */
 681       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 682       /* coord1 = min(coord1, length-1) */
 683       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 684       break;
 685
 686    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 687       coord = lp_build_abs(coord_bld, coord);
 688
 689       if (bld->static_state->normalized_coords) {
 690          /* scale coord to length */
 691          coord = lp_build_mul(coord_bld, coord, length_f);
 692       }
 693
 694       /* clamp to [0, length] */
 695       coord = lp_build_min(coord_bld, coord, length_f);
 696
 697       coord = lp_build_sub(coord_bld, coord, half);
 698
 699       weight = lp_build_fract(coord_bld, coord);
 700       coord0 = lp_build_ifloor(coord_bld, coord);
 701       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 702       break;
 703
 704    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 705       {
 706          LLVMValueRef min, max;
 707
 708          coord = lp_build_abs(coord_bld, coord);
 709
 710          if (bld->static_state->normalized_coords) {
 711             /* scale coord to length */
 712             coord = lp_build_mul(coord_bld, coord, length_f);
 713          }
 714
 715          /* clamp to [0.5, length - 0.5] */
 716          min = half;
 717          max = lp_build_sub(coord_bld, length_f, min);
 718          coord = lp_build_clamp(coord_bld, coord, min, max);
 719
 720          coord = lp_build_sub(coord_bld, coord, half);
 721
 722          weight = lp_build_fract(coord_bld, coord);
 723          coord0 = lp_build_ifloor(coord_bld, coord);
 724          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 725       }
 726       break;
 727
 728    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 729       {
 730          LLVMValueRef min, max;
 731
 732          coord = lp_build_abs(coord_bld, coord);
 733
 734          if (bld->static_state->normalized_coords) {
 735             /* scale coord to length */
 736             coord = lp_build_mul(coord_bld, coord, length_f);
 737          }
 738
 739          /* clamp to [-0.5, length + 0.5] */
 740          min = lp_build_negate(coord_bld, half);
 741          max = lp_build_sub(coord_bld, length_f, min);
 742          coord = lp_build_clamp(coord_bld, coord, min, max);
 743
 744          coord = lp_build_sub(coord_bld, coord, half);
 745
 746          weight = lp_build_fract(coord_bld, coord);
 747          coord0 = lp_build_ifloor(coord_bld, coord);
 748          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 749       }
 750       break;
 751
 752    default:
 753       assert(0);
 754       coord0 = NULL;
 755       coord1 = NULL;
 756       weight = NULL;
 757    }
 758
 759    *x0_out = coord0;
 760    *x1_out = coord1;
 761    *weight_out = weight;
 762 }
 763
 764
 765 /**
 766  * Build LLVM code for texture wrap mode for nearest filtering.
 767  * \param coord  the incoming texcoord (nominally in [0,1])
 768  * \param length  the texture size along one dimension, as int
 769  * \param is_pot  if TRUE, length is a power of two
 770  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 771  */
 772 static LLVMValueRef
 773 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 774                              LLVMValueRef coord,
 775                              LLVMValueRef length,
 776                              boolean is_pot,
 777                              unsigned wrap_mode)
 778 {
 779    struct lp_build_context *coord_bld = &bld->coord_bld;
 780    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 781    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 782    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 783    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 784    LLVMValueRef icoord;
 785
 786    switch(wrap_mode) {
 787    case PIPE_TEX_WRAP_REPEAT:
 788       coord = lp_build_mul(coord_bld, coord, length_f);
 789       icoord = lp_build_ifloor(coord_bld, coord);
 790       if (is_pot)
 791          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 792       else
 793          /* Signed remainder won't give the right results for negative
 794           * dividends but unsigned remainder does.*/
 795          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 796       break;
 797
 798    case PIPE_TEX_WRAP_CLAMP:
 799    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 800       if (bld->static_state->normalized_coords) {
 801          /* scale coord to length */
 802          coord = lp_build_mul(coord_bld, coord, length_f);
 803       }
 804
 805       /* floor */
 806       icoord = lp_build_ifloor(coord_bld, coord);
 807
 808       /* clamp to [0, length - 1]. */
 809       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 810                               length_minus_one);
 811       break;
 812
 813    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 814       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 815       {
 816          LLVMValueRef min, max;
 817
 818          if (bld->static_state->normalized_coords) {
 819             /* scale coord to length */
 820             coord = lp_build_mul(coord_bld, coord, length_f);
 821          }
 822
 823          icoord = lp_build_ifloor(coord_bld, coord);
 824
 825          /* clamp to [-1, length] */
 826          min = lp_build_negate(int_coord_bld, int_coord_bld->one);
 827          max = length;
 828          icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
 829       }
 830       break;
 831
 832    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 833       /* compute mirror function */
 834       coord = lp_build_coord_mirror(bld, coord);
 835
 836       /* scale coord to length */
 837       assert(bld->static_state->normalized_coords);
 838       coord = lp_build_mul(coord_bld, coord, length_f);
 839
 840       icoord = lp_build_ifloor(coord_bld, coord);
 841
 842       /* clamp to [0, length - 1] */
 843       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 844       break;
 845
 846    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 847    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 848       coord = lp_build_abs(coord_bld, coord);
 849
 850       if (bld->static_state->normalized_coords) {
 851          /* scale coord to length */
 852          coord = lp_build_mul(coord_bld, coord, length_f);
 853       }
 854
 855       icoord = lp_build_ifloor(coord_bld, coord);
 856
 857       /* clamp to [0, length - 1] */
 858       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 859       break;
 860
 861    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 862       coord = lp_build_abs(coord_bld, coord);
 863
 864       if (bld->static_state->normalized_coords) {
 865          /* scale coord to length */
 866          coord = lp_build_mul(coord_bld, coord, length_f);
 867       }
 868
 869       icoord = lp_build_ifloor(coord_bld, coord);
 870
 871       /* clamp to [0, length] */
 872       icoord = lp_build_min(int_coord_bld, icoord, length);
 873       break;
 874
 875    default:
 876       assert(0);
 877       icoord = NULL;
 878    }
 879
 880    return icoord;
 881 }
 882
 883
 884 /**
 885  * Codegen equivalent for u_minify().
 886  * Return max(1, base_size >> level);
 887  */
 888 static LLVMValueRef
 889 lp_build_minify(struct lp_build_sample_context *bld,
 890                 LLVMValueRef base_size,
 891                 LLVMValueRef level)
 892 {
 893    LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
 894    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 895    return size;
 896 }
 897
 898
 899 /**
 900  * Generate code to compute texture level of detail (lambda).
 901  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 902  * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 903  * \param lod_bias  optional float vector with the shader lod bias
 904  * \param explicit_lod  optional float vector with the explicit lod
 905  * \param width  scalar int texture width
 906  * \param height  scalar int texture height
 907  * \param depth  scalar int texture depth
 908  *
 909  * XXX: The resulting lod is scalar, so ignore all but the first element of
 910  * derivatives, lod_bias, etc that are passed by the shader.
 911  */
 912 static LLVMValueRef
 913 lp_build_lod_selector(struct lp_build_sample_context *bld,
 914                       const LLVMValueRef ddx[4],
 915                       const LLVMValueRef ddy[4],
 916                       LLVMValueRef lod_bias, /* optional */
 917                       LLVMValueRef explicit_lod, /* optional */
 918                       LLVMValueRef width,
 919                       LLVMValueRef height,
 920                       LLVMValueRef depth)
 921
 922 {
 923    if (bld->static_state->min_lod == bld->static_state->max_lod) {
 924       /* User is forcing sampling from a particular mipmap level.
 925        * This is hit during mipmap generation.
 926        */
 927       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 928    }
 929    else {
 930       struct lp_build_context *float_bld = &bld->float_bld;
 931       LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
 932                                                     bld->static_state->lod_bias);
 933       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
 934                                            bld->static_state->min_lod);
 935       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
 936                                            bld->static_state->max_lod);
 937       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 938       LLVMValueRef lod;
 939
 940       if (explicit_lod) {
 941          lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
 942                                        index0, "");
 943       }
 944       else {
 945          const int dims = texture_dims(bld->static_state->target);
 946          LLVMValueRef dsdx, dsdy;
 947          LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
 948          LLVMValueRef rho;
 949
 950          dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
 951          dsdx = lp_build_abs(float_bld, dsdx);
 952          dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
 953          dsdy = lp_build_abs(float_bld, dsdy);
 954          if (dims > 1) {
 955             dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
 956             dtdx = lp_build_abs(float_bld, dtdx);
 957             dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
 958             dtdy = lp_build_abs(float_bld, dtdy);
 959             if (dims > 2) {
 960                drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
 961                drdx = lp_build_abs(float_bld, drdx);
 962                drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
 963                drdy = lp_build_abs(float_bld, drdy);
 964             }
 965          }
 966
 967          /* Compute rho = max of all partial derivatives scaled by texture size.
 968           * XXX this could be vectorized somewhat
 969           */
 970          rho = LLVMBuildFMul(bld->builder,
 971                             lp_build_max(float_bld, dsdx, dsdy),
 972                             lp_build_int_to_float(float_bld, width), "");
 973          if (dims > 1) {
 974             LLVMValueRef max;
 975             max = LLVMBuildFMul(bld->builder,
 976                                lp_build_max(float_bld, dtdx, dtdy),
 977                                lp_build_int_to_float(float_bld, height), "");
 978             rho = lp_build_max(float_bld, rho, max);
 979             if (dims > 2) {
 980                max = LLVMBuildFMul(bld->builder,
 981                                   lp_build_max(float_bld, drdx, drdy),
 982                                   lp_build_int_to_float(float_bld, depth), "");
 983                rho = lp_build_max(float_bld, rho, max);
 984             }
 985          }
 986
 987          /* compute lod = log2(rho) */
 988          lod = lp_build_log2(float_bld, rho);
 989
 990          /* add shader lod bias */
 991          if (lod_bias) {
 992             lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
 993                                                index0, "");
 994             lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
 995          }
 996       }
 997
 998       /* add sampler lod bias */
 999       lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
1000
1001       /* clamp lod */
1002       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
1003
1004       return lod;
1005    }
1006 }
1007
1008
1009 /**
1010  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
1011  * mipmap level index.
1012  * Note: this is all scalar code.
1013  * \param lod  scalar float texture level of detail
1014  * \param level_out  returns integer
1015  */
1016 static void
1017 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1018                            unsigned unit,
1019                            LLVMValueRef lod,
1020                            LLVMValueRef *level_out)
1021 {
1022    struct lp_build_context *float_bld = &bld->float_bld;
1023    struct lp_build_context *int_bld = &bld->int_bld;
1024    LLVMValueRef last_level, level;
1025
1026    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
1027
1028    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
1029                                                bld->builder, unit);
1030
1031    /* convert float lod to integer */
1032    level = lp_build_iround(float_bld, lod);
1033
1034    /* clamp level to legal range of levels */
1035    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
1036 }
1037
1038
1039 /**
1040  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
1041  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
1042  * two mipmap levels and interpolate between them.
1043  */
1044 static void
1045 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1046                            unsigned unit,
1047                            LLVMValueRef lod,
1048                            LLVMValueRef *level0_out,
1049                            LLVMValueRef *level1_out,
1050                            LLVMValueRef *weight_out)
1051 {
1052    struct lp_build_context *float_bld = &bld->float_bld;
1053    struct lp_build_context *int_bld = &bld->int_bld;
1054    LLVMValueRef last_level, level;
1055
1056    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
1057                                                bld->builder, unit);
1058
1059    /* convert float lod to integer */
1060    level = lp_build_ifloor(float_bld, lod);
1061
1062    /* compute level 0 and clamp to legal range of levels */
1063    *level0_out = lp_build_clamp(int_bld, level,
1064                                 int_bld->zero,
1065                                 last_level);
1066    /* compute level 1 and clamp to legal range of levels */
1067    level = lp_build_add(int_bld, level, int_bld->one);
1068    *level1_out = lp_build_clamp(int_bld, level,
1069                                 int_bld->zero,
1070                                 last_level);
1071
1072    *weight_out = lp_build_fract(float_bld, lod);
1073 }
1074
1075
1076 /**
1077  * Generate code to sample a mipmap level with nearest filtering.
1078  * If sampling a cube texture, r = cube face in [0,5].
1079  */
1080 static void
1081 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1082                               LLVMValueRef width_vec,
1083                               LLVMValueRef height_vec,
1084                               LLVMValueRef depth_vec,
1085                               LLVMValueRef row_stride_vec,
1086                               LLVMValueRef img_stride_vec,
1087                               LLVMValueRef data_ptr,
1088                               LLVMValueRef s,
1089                               LLVMValueRef t,
1090                               LLVMValueRef r,
1091                               LLVMValueRef colors_out[4])
1092 {
1093    const int dims = texture_dims(bld->static_state->target);
1094    LLVMValueRef x, y, z;
1095
1096    /*
1097     * Compute integer texcoords.
1098     */
1099    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1100                                     bld->static_state->pot_width,
1101                                     bld->static_state->wrap_s);
1102    lp_build_name(x, "tex.x.wrapped");
1103
1104    if (dims >= 2) {
1105       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1106                                        bld->static_state->pot_height,
1107                                        bld->static_state->wrap_t);
1108       lp_build_name(y, "tex.y.wrapped");
1109
1110       if (dims == 3) {
1111          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1112                                           bld->static_state->pot_height,
1113                                           bld->static_state->wrap_r);
1114          lp_build_name(z, "tex.z.wrapped");
1115       }
1116       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1117          z = r;
1118       }
1119       else {
1120          z = NULL;
1121       }
1122    }
1123    else {
1124       y = z = NULL;
1125    }
1126
1127    /*
1128     * Get texture colors.
1129     */
1130    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1131                              x, y, z,
1132                              row_stride_vec, img_stride_vec,
1133                              data_ptr, colors_out);
1134 }
1135
1136
1137 /**
1138  * Generate code to sample a mipmap level with linear filtering.
1139  * If sampling a cube texture, r = cube face in [0,5].
1140  */
1141 static void
1142 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1143                              LLVMValueRef width_vec,
1144                              LLVMValueRef height_vec,
1145                              LLVMValueRef depth_vec,
1146                              LLVMValueRef row_stride_vec,
1147                              LLVMValueRef img_stride_vec,
1148                              LLVMValueRef data_ptr,
1149                              LLVMValueRef s,
1150                              LLVMValueRef t,
1151                              LLVMValueRef r,
1152                              LLVMValueRef colors_out[4])
1153 {
1154    const int dims = texture_dims(bld->static_state->target);
1155    LLVMValueRef x0, y0, z0, x1, y1, z1;
1156    LLVMValueRef s_fpart, t_fpart, r_fpart;
1157    LLVMValueRef neighbors[2][2][4];
1158    int chan;
1159
1160    /*
1161     * Compute integer texcoords.
1162     */
1163    lp_build_sample_wrap_linear(bld, s, width_vec,
1164                                bld->static_state->pot_width,
1165                                bld->static_state->wrap_s,
1166                                &x0, &x1, &s_fpart);
1167    lp_build_name(x0, "tex.x0.wrapped");
1168    lp_build_name(x1, "tex.x1.wrapped");
1169
1170    if (dims >= 2) {
1171       lp_build_sample_wrap_linear(bld, t, height_vec,
1172                                   bld->static_state->pot_height,
1173                                   bld->static_state->wrap_t,
1174                                   &y0, &y1, &t_fpart);
1175       lp_build_name(y0, "tex.y0.wrapped");
1176       lp_build_name(y1, "tex.y1.wrapped");
1177
1178       if (dims == 3) {
1179          lp_build_sample_wrap_linear(bld, r, depth_vec,
1180                                      bld->static_state->pot_depth,
1181                                      bld->static_state->wrap_r,
1182                                      &z0, &z1, &r_fpart);
1183          lp_build_name(z0, "tex.z0.wrapped");
1184          lp_build_name(z1, "tex.z1.wrapped");
1185       }
1186       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1187          z0 = z1 = r;  /* cube face */
1188          r_fpart = NULL;
1189       }
1190       else {
1191          z0 = z1 = NULL;
1192          r_fpart = NULL;
1193       }
1194    }
1195    else {
1196       y0 = y1 = t_fpart = NULL;
1197       z0 = z1 = r_fpart = NULL;
1198    }
1199
1200    /*
1201     * Get texture colors.
1202     */
1203    /* get x0/x1 texels */
1204    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1205                              x0, y0, z0,
1206                              row_stride_vec, img_stride_vec,
1207                              data_ptr, neighbors[0][0]);
1208    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1209                              x1, y0, z0,
1210                              row_stride_vec, img_stride_vec,
1211                              data_ptr, neighbors[0][1]);
1212
1213    if (dims == 1) {
1214       /* Interpolate two samples from 1D image to produce one color */
1215       for (chan = 0; chan < 4; chan++) {
1216          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1217                                           neighbors[0][0][chan],
1218                                           neighbors[0][1][chan]);
1219       }
1220    }
1221    else {
1222       /* 2D/3D texture */
1223       LLVMValueRef colors0[4];
1224
1225       /* get x0/x1 texels at y1 */
1226       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1227                                 x0, y1, z0,
1228                                 row_stride_vec, img_stride_vec,
1229                                 data_ptr, neighbors[1][0]);
1230       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1231                                 x1, y1, z0,
1232                                 row_stride_vec, img_stride_vec,
1233                                 data_ptr, neighbors[1][1]);
1234
1235       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1236       for (chan = 0; chan < 4; chan++) {
1237          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1238                                           s_fpart, t_fpart,
1239                                           neighbors[0][0][chan],
1240                                           neighbors[0][1][chan],
1241                                           neighbors[1][0][chan],
1242                                           neighbors[1][1][chan]);
1243       }
1244
1245       if (dims == 3) {
1246          LLVMValueRef neighbors1[2][2][4];
1247          LLVMValueRef colors1[4];
1248
1249          /* get x0/x1/y0/y1 texels at z1 */
1250          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1251                                    x0, y0, z1,
1252                                    row_stride_vec, img_stride_vec,
1253                                    data_ptr, neighbors1[0][0]);
1254          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1255                                    x1, y0, z1,
1256                                    row_stride_vec, img_stride_vec,
1257                                    data_ptr, neighbors1[0][1]);
1258          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1259                                    x0, y1, z1,
1260                                    row_stride_vec, img_stride_vec,
1261                                    data_ptr, neighbors1[1][0]);
1262          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1263                                    x1, y1, z1,
1264                                    row_stride_vec, img_stride_vec,
1265                                    data_ptr, neighbors1[1][1]);
1266
1267          /* Bilinear interpolate the four samples from the second Z slice */
1268          for (chan = 0; chan < 4; chan++) {
1269             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1270                                              s_fpart, t_fpart,
1271                                              neighbors1[0][0][chan],
1272                                              neighbors1[0][1][chan],
1273                                              neighbors1[1][0][chan],
1274                                              neighbors1[1][1][chan]);
1275          }
1276
1277          /* Linearly interpolate the two samples from the two 3D slices */
1278          for (chan = 0; chan < 4; chan++) {
1279             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1280                                              r_fpart,
1281                                              colors0[chan], colors1[chan]);
1282          }
1283       }
1284       else {
1285          /* 2D tex */
1286          for (chan = 0; chan < 4; chan++) {
1287             colors_out[chan] = colors0[chan];
1288          }
1289       }
1290    }
1291 }
1292
1293
1294 /** Helper used by lp_build_cube_lookup() */
1295 static LLVMValueRef
1296 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1297 {
1298    /* ima = -0.5 / abs(coord); */
1299    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1300    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1301    LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
1302    return ima;
1303 }
1304
1305
1306 /**
1307  * Helper used by lp_build_cube_lookup()
1308  * \param sign  scalar +1 or -1
1309  * \param coord  float vector
1310  * \param ima  float vector
1311  */
1312 static LLVMValueRef
1313 lp_build_cube_coord(struct lp_build_context *coord_bld,
1314                     LLVMValueRef sign, int negate_coord,
1315                     LLVMValueRef coord, LLVMValueRef ima)
1316 {
1317    /* return negate(coord) * ima * sign + 0.5; */
1318    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1319    LLVMValueRef res;
1320
1321    assert(negate_coord == +1 || negate_coord == -1);
1322
1323    if (negate_coord == -1) {
1324       coord = lp_build_negate(coord_bld, coord);
1325    }
1326
1327    res = lp_build_mul(coord_bld, coord, ima);
1328    if (sign) {
1329       sign = lp_build_broadcast_scalar(coord_bld, sign);
1330       res = lp_build_mul(coord_bld, res, sign);
1331    }
1332    res = lp_build_add(coord_bld, res, half);
1333
1334    return res;
1335 }
1336
1337
1338 /** Helper used by lp_build_cube_lookup()
1339  * Return (major_coord >= 0) ? pos_face : neg_face;
1340  */
1341 static LLVMValueRef
1342 lp_build_cube_face(struct lp_build_sample_context *bld,
1343                    LLVMValueRef major_coord,
1344                    unsigned pos_face, unsigned neg_face)
1345 {
1346    LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1347                                     major_coord,
1348                                     bld->float_bld.zero, "");
1349    LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1350    LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1351    LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1352    return res;
1353 }
1354
1355
1356
1357 /**
1358  * Generate code to do cube face selection and compute per-face texcoords.
1359  */
1360 static void
1361 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1362                      LLVMValueRef s,
1363                      LLVMValueRef t,
1364                      LLVMValueRef r,
1365                      LLVMValueRef *face,
1366                      LLVMValueRef *face_s,
1367                      LLVMValueRef *face_t)
1368 {
1369    struct lp_build_context *float_bld = &bld->float_bld;
1370    struct lp_build_context *coord_bld = &bld->coord_bld;
1371    LLVMValueRef rx, ry, rz;
1372    LLVMValueRef arx, ary, arz;
1373    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1374    LLVMValueRef arx_ge_ary, arx_ge_arz;
1375    LLVMValueRef ary_ge_arx, ary_ge_arz;
1376    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1377    LLVMValueRef rx_pos, ry_pos, rz_pos;
1378
1379    assert(bld->coord_bld.type.length == 4);
1380
1381    /*
1382     * Use the average of the four pixel's texcoords to choose the face.
1383     */
1384    rx = lp_build_mul(float_bld, c25,
1385                      lp_build_sum_vector(&bld->coord_bld, s));
1386    ry = lp_build_mul(float_bld, c25,
1387                      lp_build_sum_vector(&bld->coord_bld, t));
1388    rz = lp_build_mul(float_bld, c25,
1389                      lp_build_sum_vector(&bld->coord_bld, r));
1390
1391    arx = lp_build_abs(float_bld, rx);
1392    ary = lp_build_abs(float_bld, ry);
1393    arz = lp_build_abs(float_bld, rz);
1394
1395    /*
1396     * Compare sign/magnitude of rx,ry,rz to determine face
1397     */
1398    arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1399    arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1400    ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1401    ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1402
1403    arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1404    ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1405
1406    rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1407    ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1408    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1409
1410    {
1411       struct lp_build_flow_context *flow_ctx;
1412       struct lp_build_if_state if_ctx;
1413
1414       flow_ctx = lp_build_flow_create(bld->builder);
1415       lp_build_flow_scope_begin(flow_ctx);
1416
1417       *face_s = bld->coord_bld.undef;
1418       *face_t = bld->coord_bld.undef;
1419       *face = bld->int_bld.undef;
1420
1421       lp_build_name(*face_s, "face_s");
1422       lp_build_name(*face_t, "face_t");
1423       lp_build_name(*face, "face");
1424
1425       lp_build_flow_scope_declare(flow_ctx, face_s);
1426       lp_build_flow_scope_declare(flow_ctx, face_t);
1427       lp_build_flow_scope_declare(flow_ctx, face);
1428
1429       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1430       {
1431          /* +/- X face */
1432          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1433          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1434          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1435          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1436          *face = lp_build_cube_face(bld, rx,
1437                                     PIPE_TEX_FACE_POS_X,
1438                                     PIPE_TEX_FACE_NEG_X);
1439       }
1440       lp_build_else(&if_ctx);
1441       {
1442          struct lp_build_flow_context *flow_ctx2;
1443          struct lp_build_if_state if_ctx2;
1444
1445          LLVMValueRef face_s2 = bld->coord_bld.undef;
1446          LLVMValueRef face_t2 = bld->coord_bld.undef;
1447          LLVMValueRef face2 = bld->int_bld.undef;
1448
1449          flow_ctx2 = lp_build_flow_create(bld->builder);
1450          lp_build_flow_scope_begin(flow_ctx2);
1451          lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1452          lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1453          lp_build_flow_scope_declare(flow_ctx2, &face2);
1454
1455          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1456
1457          lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1458          {
1459             /* +/- Y face */
1460             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1461             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1462             face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1463             face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1464             face2 = lp_build_cube_face(bld, ry,
1465                                        PIPE_TEX_FACE_POS_Y,
1466                                        PIPE_TEX_FACE_NEG_Y);
1467          }
1468          lp_build_else(&if_ctx2);
1469          {
1470             /* +/- Z face */
1471             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1472             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1473             face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1474             face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1475             face2 = lp_build_cube_face(bld, rz,
1476                                        PIPE_TEX_FACE_POS_Z,
1477                                        PIPE_TEX_FACE_NEG_Z);
1478          }
1479          lp_build_endif(&if_ctx2);
1480          lp_build_flow_scope_end(flow_ctx2);
1481          lp_build_flow_destroy(flow_ctx2);
1482          *face_s = face_s2;
1483          *face_t = face_t2;
1484          *face = face2;
1485       }
1486
1487       lp_build_endif(&if_ctx);
1488       lp_build_flow_scope_end(flow_ctx);
1489       lp_build_flow_destroy(flow_ctx);
1490    }
1491 }
1492
1493
1494
1495 /**
1496  * Sample the texture/mipmap using given image filter and mip filter.
1497  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1498  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1499  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1500  */
1501 static void
1502 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1503                        unsigned img_filter,
1504                        unsigned mip_filter,
1505                        LLVMValueRef s,
1506                        LLVMValueRef t,
1507                        LLVMValueRef r,
1508                        LLVMValueRef lod_fpart,
1509                        LLVMValueRef width0_vec,
1510                        LLVMValueRef width1_vec,
1511                        LLVMValueRef height0_vec,
1512                        LLVMValueRef height1_vec,
1513                        LLVMValueRef depth0_vec,
1514                        LLVMValueRef depth1_vec,
1515                        LLVMValueRef row_stride0_vec,
1516                        LLVMValueRef row_stride1_vec,
1517                        LLVMValueRef img_stride0_vec,
1518                        LLVMValueRef img_stride1_vec,
1519                        LLVMValueRef data_ptr0,
1520                        LLVMValueRef data_ptr1,
1521                        LLVMValueRef *colors_out)
1522 {
1523    LLVMValueRef colors0[4], colors1[4];
1524    int chan;
1525
1526    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1527       /* sample the first mipmap level */
1528       lp_build_sample_image_nearest(bld,
1529                                     width0_vec, height0_vec, depth0_vec,
1530                                     row_stride0_vec, img_stride0_vec,
1531                                     data_ptr0, s, t, r, colors0);
1532
1533       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1534          /* sample the second mipmap level */
1535          lp_build_sample_image_nearest(bld,
1536                                        width1_vec, height1_vec, depth1_vec,
1537                                        row_stride1_vec, img_stride1_vec,
1538                                        data_ptr1, s, t, r, colors1);
1539       }
1540    }
1541    else {
1542       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1543
1544       /* sample the first mipmap level */
1545       lp_build_sample_image_linear(bld,
1546                                    width0_vec, height0_vec, depth0_vec,
1547                                    row_stride0_vec, img_stride0_vec,
1548                                    data_ptr0, s, t, r, colors0);
1549
1550       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1551          /* sample the second mipmap level */
1552          lp_build_sample_image_linear(bld,
1553                                       width1_vec, height1_vec, depth1_vec,
1554                                       row_stride1_vec, img_stride1_vec,
1555                                       data_ptr1, s, t, r, colors1);
1556       }
1557    }
1558
1559    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1560       /* interpolate samples from the two mipmap levels */
1561       for (chan = 0; chan < 4; chan++) {
1562          colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1563                                           colors0[chan], colors1[chan]);
1564       }
1565    }
1566    else {
1567       /* use first/only level's colors */
1568       for (chan = 0; chan < 4; chan++) {
1569          colors_out[chan] = colors0[chan];
1570       }
1571    }
1572 }
1573
1574
1575
1576 /**
1577  * General texture sampling codegen.
1578  * This function handles texture sampling for all texture targets (1D,
1579  * 2D, 3D, cube) and all filtering modes.
1580  */
1581 static void
1582 lp_build_sample_general(struct lp_build_sample_context *bld,
1583                         unsigned unit,
1584                         LLVMValueRef s,
1585                         LLVMValueRef t,
1586                         LLVMValueRef r,
1587                         const LLVMValueRef *ddx,
1588                         const LLVMValueRef *ddy,
1589                         LLVMValueRef lod_bias, /* optional */
1590                         LLVMValueRef explicit_lod, /* optional */
1591                         LLVMValueRef width,
1592                         LLVMValueRef height,
1593                         LLVMValueRef depth,
1594                         LLVMValueRef width_vec,
1595                         LLVMValueRef height_vec,
1596                         LLVMValueRef depth_vec,
1597                         LLVMValueRef row_stride_array,
1598                         LLVMValueRef img_stride_array,
1599                         LLVMValueRef data_array,
1600                         LLVMValueRef *colors_out)
1601 {
1602    struct lp_build_context *float_bld = &bld->float_bld;
1603    const unsigned mip_filter = bld->static_state->min_mip_filter;
1604    const unsigned min_filter = bld->static_state->min_img_filter;
1605    const unsigned mag_filter = bld->static_state->mag_img_filter;
1606    const int dims = texture_dims(bld->static_state->target);
1607    LLVMValueRef lod = NULL, lod_fpart = NULL;
1608    LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1609    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1610    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1611    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1612    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1613    LLVMValueRef data_ptr0, data_ptr1 = NULL;
1614    LLVMValueRef face_ddx[4], face_ddy[4];
1615
1616    /*
1617    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1618           mip_filter, min_filter, mag_filter);
1619    */
1620
1621    /*
1622     * Choose cube face, recompute texcoords and derivatives for the chosen face.
1623     */
1624    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1625       LLVMValueRef face, face_s, face_t;
1626       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1627       s = face_s; /* vec */
1628       t = face_t; /* vec */
1629       /* use 'r' to indicate cube face */
1630       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1631
1632       /* recompute ddx, ddy using the new (s,t) face texcoords */
1633       face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
1634       face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
1635       face_ddx[2] = NULL;
1636       face_ddx[3] = NULL;
1637       face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
1638       face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
1639       face_ddy[2] = NULL;
1640       face_ddy[3] = NULL;
1641       ddx = face_ddx;
1642       ddy = face_ddy;
1643    }
1644
1645    /*
1646     * Compute the level of detail (float).
1647     */
1648    if (min_filter != mag_filter ||
1649        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1650       /* Need to compute lod either to choose mipmap levels or to
1651        * distinguish between minification/magnification with one mipmap level.
1652        */
1653       lod = lp_build_lod_selector(bld, ddx, ddy,
1654                                   lod_bias, explicit_lod,
1655                                   width, height, depth);
1656    }
1657
1658    /*
1659     * Compute integer mipmap level(s) to fetch texels from.
1660     */
1661    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1662       /* always use mip level 0 */
1663       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1664          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1665           * We should be able to set ilevel0 = const(0) but that causes
1666           * bad x86 code to be emitted.
1667           */
1668          lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
1669          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1670       }
1671       else {
1672          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1673       }
1674    }
1675    else {
1676       assert(lod);
1677       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1678          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1679       }
1680       else {
1681          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1682          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1683                                     &lod_fpart);
1684          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1685       }
1686    }
1687
1688    /*
1689     * Convert scalar integer mipmap levels into vectors.
1690     */
1691    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1692    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1693       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1694
1695    /*
1696     * Compute width, height at mipmap level 'ilevel0'
1697     */
1698    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1699    if (dims >= 2) {
1700       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1701       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1702                                                       ilevel0);
1703       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1704          img_stride0_vec = lp_build_get_level_stride_vec(bld,
1705                                                          img_stride_array,
1706                                                          ilevel0);
1707          if (dims == 3) {
1708             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1709          }
1710       }
1711    }
1712    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1713       /* compute width, height, depth for second mipmap level at 'ilevel1' */
1714       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1715       if (dims >= 2) {
1716          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1717          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1718                                                          ilevel1);
1719          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1720             img_stride1_vec = lp_build_get_level_stride_vec(bld,
1721                                                             img_stride_array,
1722                                                             ilevel1);
1723             if (dims ==3) {
1724                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1725             }
1726          }
1727       }
1728    }
1729
1730    /*
1731     * Get pointer(s) to image data for mipmap level(s).
1732     */
1733    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1734    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1735       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1736    }
1737
1738    /*
1739     * Get/interpolate texture colors.
1740     */
1741    if (min_filter == mag_filter) {
1742       /* no need to distinquish between minification and magnification */
1743       lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1744                              width0_vec, width1_vec,
1745                              height0_vec, height1_vec,
1746                              depth0_vec, depth1_vec,
1747                              row_stride0_vec, row_stride1_vec,
1748                              img_stride0_vec, img_stride1_vec,
1749                              data_ptr0, data_ptr1,
1750                              colors_out);
1751    }
1752    else {
1753       /* Emit conditional to choose min image filter or mag image filter
1754        * depending on the lod being >0 or <= 0, respectively.
1755        */
1756       struct lp_build_flow_context *flow_ctx;
1757       struct lp_build_if_state if_ctx;
1758       LLVMValueRef minify;
1759
1760       flow_ctx = lp_build_flow_create(bld->builder);
1761       lp_build_flow_scope_begin(flow_ctx);
1762
1763       lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1764       lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1765       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1766       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1767
1768       /* minify = lod > 0.0 */
1769       minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1770                              lod, float_bld->zero, "");
1771
1772       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1773       {
1774          /* Use the minification filter */
1775          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1776                                 s, t, r, lod_fpart,
1777                                 width0_vec, width1_vec,
1778                                 height0_vec, height1_vec,
1779                                 depth0_vec, depth1_vec,
1780                                 row_stride0_vec, row_stride1_vec,
1781                                 img_stride0_vec, img_stride1_vec,
1782                                 data_ptr0, data_ptr1,
1783                                 colors_out);
1784       }
1785       lp_build_else(&if_ctx);
1786       {
1787          /* Use the magnification filter */
1788          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1789                                 s, t, r, lod_fpart,
1790                                 width0_vec, width1_vec,
1791                                 height0_vec, height1_vec,
1792                                 depth0_vec, depth1_vec,
1793                                 row_stride0_vec, row_stride1_vec,
1794                                 img_stride0_vec, img_stride1_vec,
1795                                 data_ptr0, data_ptr1,
1796                                 colors_out);
1797       }
1798       lp_build_endif(&if_ctx);
1799
1800       lp_build_flow_scope_end(flow_ctx);
1801       lp_build_flow_destroy(flow_ctx);
1802    }
1803 }
1804
1805
1806
1807 static void
1808 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1809                               LLVMValueRef s,
1810                               LLVMValueRef t,
1811                               LLVMValueRef width,
1812                               LLVMValueRef height,
1813                               LLVMValueRef stride_array,
1814                               LLVMValueRef data_array,
1815                               LLVMValueRef texel_out[4])
1816 {
1817    LLVMBuilderRef builder = bld->builder;
1818    struct lp_build_context i32, h16, u8n;
1819    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1820    LLVMValueRef i32_c8, i32_c128, i32_c255;
1821    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1822    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1823    LLVMValueRef data_ptr;
1824    LLVMValueRef x_stride, y_stride;
1825    LLVMValueRef x_offset0, x_offset1;
1826    LLVMValueRef y_offset0, y_offset1;
1827    LLVMValueRef offset[2][2];
1828    LLVMValueRef x_subcoord[2], y_subcoord[2];
1829    LLVMValueRef neighbors_lo[2][2];
1830    LLVMValueRef neighbors_hi[2][2];
1831    LLVMValueRef packed, packed_lo, packed_hi;
1832    LLVMValueRef unswizzled[4];
1833    const unsigned level = 0;
1834    unsigned i, j;
1835
1836    assert(bld->static_state->target == PIPE_TEXTURE_2D
1837          || bld->static_state->target == PIPE_TEXTURE_RECT);
1838    assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1839    assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1840    assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1841
1842    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1843    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1844    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1845
1846    i32_vec_type = lp_build_vec_type(i32.type);
1847    h16_vec_type = lp_build_vec_type(h16.type);
1848    u8n_vec_type = lp_build_vec_type(u8n.type);
1849
1850    if (bld->static_state->normalized_coords) {
1851       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1852       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1853       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1854       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1855       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1856    }
1857
1858    /* scale coords by 256 (8 fractional bits) */
1859    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1860    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1861
1862    /* convert float to int */
1863    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1864    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1865
1866    /* subtract 0.5 (add -128) */
1867    i32_c128 = lp_build_const_int_vec(i32.type, -128);
1868    s = LLVMBuildAdd(builder, s, i32_c128, "");
1869    t = LLVMBuildAdd(builder, t, i32_c128, "");
1870
1871    /* compute floor (shift right 8) */
1872    i32_c8 = lp_build_const_int_vec(i32.type, 8);
1873    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1874    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1875
1876    /* compute fractional part (AND with 0xff) */
1877    i32_c255 = lp_build_const_int_vec(i32.type, 255);
1878    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1879    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1880
1881    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
1882                                  bld->format_desc->block.bits/8);
1883
1884    y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
1885
1886    lp_build_sample_wrap_linear_int(bld,
1887                                    bld->format_desc->block.width,
1888                                    s_ipart, width, x_stride,
1889                                    bld->static_state->pot_width,
1890                                    bld->static_state->wrap_s,
1891                                    &x_offset0, &x_offset1,
1892                                    &x_subcoord[0], &x_subcoord[1]);
1893    lp_build_sample_wrap_linear_int(bld,
1894                                    bld->format_desc->block.height,
1895                                    t_ipart, height, y_stride,
1896                                    bld->static_state->pot_height,
1897                                    bld->static_state->wrap_t,
1898                                    &y_offset0, &y_offset1,
1899                                    &y_subcoord[0], &y_subcoord[1]);
1900
1901    offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
1902    offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
1903    offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
1904    offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
1905
1906    /*
1907     * Transform 4 x i32 in
1908     *
1909     *   s_fpart = {s0, s1, s2, s3}
1910     *
1911     * into 8 x i16
1912     *
1913     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1914     *
1915     * into two 8 x i16
1916     *
1917     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1918     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1919     *
1920     * and likewise for t_fpart. There is no risk of loosing precision here
1921     * since the fractional parts only use the lower 8bits.
1922     */
1923
1924    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1925    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1926
1927    {
1928       LLVMTypeRef elem_type = LLVMInt32Type();
1929       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1930       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1931       LLVMValueRef shuffle_lo;
1932       LLVMValueRef shuffle_hi;
1933
1934       for(j = 0; j < h16.type.length; j += 4) {
1935 #ifdef PIPE_ARCH_LITTLE_ENDIAN
1936          unsigned subindex = 0;
1937 #else
1938          unsigned subindex = 1;
1939 #endif
1940          LLVMValueRef index;
1941
1942          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1943          for(i = 0; i < 4; ++i)
1944             shuffles_lo[j + i] = index;
1945
1946          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1947          for(i = 0; i < 4; ++i)
1948             shuffles_hi[j + i] = index;
1949       }
1950
1951       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1952       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1953
1954       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1955       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1956       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1957       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1958    }
1959
1960    /*
1961     * get pointer to mipmap level 0 data
1962     */
1963    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
1964
1965    /*
1966     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1967     *
1968     *   rgba0 rgba1 rgba2 rgba3
1969     *
1970     * bit cast them into 16 x u8
1971     *
1972     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1973     *
1974     * unpack them into two 8 x i16:
1975     *
1976     *   r0 g0 b0 a0 r1 g1 b1 a1
1977     *   r2 g2 b2 a2 r3 g3 b3 a3
1978     *
1979     * The higher 8 bits of the resulting elements will be zero.
1980     */
1981
1982    for (j = 0; j < 2; ++j) {
1983       for (i = 0; i < 2; ++i) {
1984          LLVMValueRef rgba8;
1985
1986          if (util_format_is_rgba8_variant(bld->format_desc)) {
1987             /*
1988              * Given the format is a rgba8, just read the pixels as is,
1989              * without any swizzling. Swizzling will be done later.
1990              */
1991             rgba8 = lp_build_gather(bld->builder,
1992                                     bld->texel_type.length,
1993                                     bld->format_desc->block.bits,
1994                                     bld->texel_type.width,
1995                                     data_ptr, offset[j][i]);
1996
1997             rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
1998
1999          }
2000          else {
2001             rgba8 = lp_build_fetch_rgba_aos(bld->builder,
2002                                             bld->format_desc,
2003                                             u8n.type,
2004                                             data_ptr, offset[j][i],
2005                                             x_subcoord[i],
2006                                             y_subcoord[j]);
2007          }
2008
2009          lp_build_unpack2(builder, u8n.type, h16.type,
2010                           rgba8,
2011                           &neighbors_lo[j][i], &neighbors_hi[j][i]);
2012       }
2013    }
2014
2015    /*
2016     * Linear interpolate with 8.8 fixed point.
2017     */
2018
2019    packed_lo = lp_build_lerp_2d(&h16,
2020                                 s_fpart_lo, t_fpart_lo,
2021                                 neighbors_lo[0][0],
2022                                 neighbors_lo[0][1],
2023                                 neighbors_lo[1][0],
2024                                 neighbors_lo[1][1]);
2025
2026    packed_hi = lp_build_lerp_2d(&h16,
2027                                 s_fpart_hi, t_fpart_hi,
2028                                 neighbors_hi[0][0],
2029                                 neighbors_hi[0][1],
2030                                 neighbors_hi[1][0],
2031                                 neighbors_hi[1][1]);
2032
2033    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
2034
2035    /*
2036     * Convert to SoA and swizzle.
2037     */
2038
2039    lp_build_rgba8_to_f32_soa(bld->builder,
2040                              bld->texel_type,
2041                              packed, unswizzled);
2042
2043    if (util_format_is_rgba8_variant(bld->format_desc)) {
2044       lp_build_format_swizzle_soa(bld->format_desc,
2045                                   &bld->texel_bld,
2046                                   unswizzled, texel_out);
2047    } else {
2048       texel_out[0] = unswizzled[0];
2049       texel_out[1] = unswizzled[1];
2050       texel_out[2] = unswizzled[2];
2051       texel_out[3] = unswizzled[3];
2052    }
2053
2054    apply_sampler_swizzle(bld, texel_out);
2055 }
2056
2057
2058 static void
2059 lp_build_sample_compare(struct lp_build_sample_context *bld,
2060                         LLVMValueRef p,
2061                         LLVMValueRef texel[4])
2062 {
2063    struct lp_build_context *texel_bld = &bld->texel_bld;
2064    LLVMValueRef res;
2065    unsigned chan;
2066
2067    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
2068       return;
2069
2070    /* TODO: Compare before swizzling, to avoid redundant computations */
2071    res = NULL;
2072    for(chan = 0; chan < 4; ++chan) {
2073       LLVMValueRef cmp;
2074       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
2075       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
2076
2077       if(res)
2078          res = lp_build_add(texel_bld, res, cmp);
2079       else
2080          res = cmp;
2081    }
2082
2083    assert(res);
2084    res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
2085
2086    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2087    for(chan = 0; chan < 3; ++chan)
2088       texel[chan] = res;
2089    texel[3] = texel_bld->one;
2090 }
2091
2092
2093 /**
2094  * Just set texels to white instead of actually sampling the texture.
2095  * For debugging.
2096  */
2097 static void
2098 lp_build_sample_nop(struct lp_build_sample_context *bld,
2099                     LLVMValueRef texel_out[4])
2100 {
2101    struct lp_build_context *texel_bld = &bld->texel_bld;
2102    unsigned chan;
2103
2104    for (chan = 0; chan < 4; chan++) {
2105       /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
2106       texel_out[chan] = texel_bld->one;
2107    }
2108 }
2109
2110
2111 /**
2112  * Build texture sampling code.
2113  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2114  * R, G, B, A.
2115  * \param type  vector float type to use for coords, etc.
2116  * \param ddx  partial derivatives of (s,t,r,q) with respect to x
2117  * \param ddy  partial derivatives of (s,t,r,q) with respect to y
2118  */
2119 void
2120 lp_build_sample_soa(LLVMBuilderRef builder,
2121                     const struct lp_sampler_static_state *static_state,
2122                     struct lp_sampler_dynamic_state *dynamic_state,
2123                     struct lp_type type,
2124                     unsigned unit,
2125                     unsigned num_coords,
2126                     const LLVMValueRef *coords,
2127                     const LLVMValueRef ddx[4],
2128                     const LLVMValueRef ddy[4],
2129                     LLVMValueRef lod_bias, /* optional */
2130                     LLVMValueRef explicit_lod, /* optional */
2131                     LLVMValueRef texel_out[4])
2132 {
2133    struct lp_build_sample_context bld;
2134    LLVMValueRef width, width_vec;
2135    LLVMValueRef height, height_vec;
2136    LLVMValueRef depth, depth_vec;
2137    LLVMValueRef row_stride_array, img_stride_array;
2138    LLVMValueRef data_array;
2139    LLVMValueRef s;
2140    LLVMValueRef t;
2141    LLVMValueRef r;
2142
2143    if (0) {
2144       enum pipe_format fmt = static_state->format;
2145       debug_printf("Sample from %s\n", util_format_name(fmt));
2146    }
2147
2148    assert(type.floating);
2149
2150    /* Setup our build context */
2151    memset(&bld, 0, sizeof bld);
2152    bld.builder = builder;
2153    bld.static_state = static_state;
2154    bld.dynamic_state = dynamic_state;
2155    bld.format_desc = util_format_description(static_state->format);
2156
2157    bld.float_type = lp_type_float(32);
2158    bld.int_type = lp_type_int(32);
2159    bld.coord_type = type;
2160    bld.uint_coord_type = lp_uint_type(type);
2161    bld.int_coord_type = lp_int_type(type);
2162    bld.texel_type = type;
2163
2164    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2165    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2166    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2167    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2168    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2169    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2170
2171    /* Get the dynamic state */
2172    width = dynamic_state->width(dynamic_state, builder, unit);
2173    height = dynamic_state->height(dynamic_state, builder, unit);
2174    depth = dynamic_state->depth(dynamic_state, builder, unit);
2175    row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2176    img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2177    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2178    /* Note that data_array is an array[level] of pointers to texture images */
2179
2180    s = coords[0];
2181    t = coords[1];
2182    r = coords[2];
2183
2184    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2185    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2186    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2187
2188    if (0) {
2189       /* For debug: no-op texture sampling */
2190       lp_build_sample_nop(&bld, texel_out);
2191    }
2192    else if (util_format_fits_8unorm(bld.format_desc) &&
2193             bld.format_desc->nr_channels > 1 &&
2194             (static_state->target == PIPE_TEXTURE_2D ||
2195                   static_state->target == PIPE_TEXTURE_RECT) &&
2196             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2197             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2198             static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2199             is_simple_wrap_mode(static_state->wrap_s) &&
2200             is_simple_wrap_mode(static_state->wrap_t)) {
2201       /* special case */
2202       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2203                                     row_stride_array, data_array, texel_out);
2204    }
2205    else {
2206       lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
2207                               lod_bias, explicit_lod,
2208                               width, height, depth,
2209                               width_vec, height_vec, depth_vec,
2210                               row_stride_array, img_stride_array,
2211                               data_array,
2212                               texel_out);
2213    }
2214
2215    lp_build_sample_compare(&bld, r, texel_out);
2216 }