src/gallium/auxiliary/gallivm/lp_bld_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- common code.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include "pipe/p_defines.h"
  36 #include "pipe/p_state.h"
  37 #include "util/u_format.h"
  38 #include "util/u_math.h"
  39 #include "lp_bld_arit.h"
  40 #include "lp_bld_const.h"
  41 #include "lp_bld_debug.h"
  42 #include "lp_bld_printf.h"
  43 #include "lp_bld_flow.h"
  44 #include "lp_bld_sample.h"
  45 #include "lp_bld_swizzle.h"
  46 #include "lp_bld_type.h"
  47
  48
  49 /*
  50  * Bri-linear factor. Should be greater than one.
  51  */
  52 #define BRILINEAR_FACTOR 2
  53
  54 /**
  55  * Does the given texture wrap mode allow sampling the texture border color?
  56  * XXX maybe move this into gallium util code.
  57  */
  58 boolean
  59 lp_sampler_wrap_mode_uses_border_color(unsigned mode,
  60                                        unsigned min_img_filter,
  61                                        unsigned mag_img_filter)
  62 {
  63    switch (mode) {
  64    case PIPE_TEX_WRAP_REPEAT:
  65    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
  66    case PIPE_TEX_WRAP_MIRROR_REPEAT:
  67    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  68       return FALSE;
  69    case PIPE_TEX_WRAP_CLAMP:
  70    case PIPE_TEX_WRAP_MIRROR_CLAMP:
  71       if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
  72           mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
  73          return FALSE;
  74       } else {
  75          return TRUE;
  76       }
  77    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
  78    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  79       return TRUE;
  80    default:
  81       assert(0 && "unexpected wrap mode");
  82       return FALSE;
  83    }
  84 }
  85
  86
  87 /**
  88  * Initialize lp_sampler_static_state object with the gallium sampler
  89  * and texture state.
  90  * The former is considered to be static and the later dynamic.
  91  */
  92 void
  93 lp_sampler_static_state(struct lp_sampler_static_state *state,
  94                         const struct pipe_sampler_view *view,
  95                         const struct pipe_sampler_state *sampler)
  96 {
  97    const struct pipe_resource *texture = view->texture;
  98
  99    memset(state, 0, sizeof *state);
 100
 101    if(!texture)
 102       return;
 103
 104    if(!sampler)
 105       return;
 106
 107    /*
 108     * We don't copy sampler state over unless it is actually enabled, to avoid
 109     * spurious recompiles, as the sampler static state is part of the shader
 110     * key.
 111     *
 112     * Ideally the state tracker or cso_cache module would make all state
 113     * canonical, but until that happens it's better to be safe than sorry here.
 114     *
 115     * XXX: Actually there's much more than can be done here, especially
 116     * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
 117     */
 118
 119    state->format            = view->format;
 120    state->swizzle_r         = view->swizzle_r;
 121    state->swizzle_g         = view->swizzle_g;
 122    state->swizzle_b         = view->swizzle_b;
 123    state->swizzle_a         = view->swizzle_a;
 124
 125    state->target            = texture->target;
 126    state->pot_width         = util_is_power_of_two(texture->width0);
 127    state->pot_height        = util_is_power_of_two(texture->height0);
 128    state->pot_depth         = util_is_power_of_two(texture->depth0);
 129
 130    state->wrap_s            = sampler->wrap_s;
 131    state->wrap_t            = sampler->wrap_t;
 132    state->wrap_r            = sampler->wrap_r;
 133    state->min_img_filter    = sampler->min_img_filter;
 134    state->mag_img_filter    = sampler->mag_img_filter;
 135
 136    if (view->u.tex.last_level && sampler->max_lod > 0.0f) {
 137       state->min_mip_filter = sampler->min_mip_filter;
 138    } else {
 139       state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
 140    }
 141
 142    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 143       if (sampler->lod_bias != 0.0f) {
 144          state->lod_bias_non_zero = 1;
 145       }
 146
 147       /* If min_lod == max_lod we can greatly simplify mipmap selection.
 148        * This is a case that occurs during automatic mipmap generation.
 149        */
 150       if (sampler->min_lod == sampler->max_lod) {
 151          state->min_max_lod_equal = 1;
 152       } else {
 153          if (sampler->min_lod > 0.0f) {
 154             state->apply_min_lod = 1;
 155          }
 156
 157          if (sampler->max_lod < (float)view->u.tex.last_level) {
 158             state->apply_max_lod = 1;
 159          }
 160       }
 161    }
 162
 163    state->compare_mode      = sampler->compare_mode;
 164    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
 165       state->compare_func   = sampler->compare_func;
 166    }
 167
 168    state->normalized_coords = sampler->normalized_coords;
 169
 170    /*
 171     * FIXME: Handle the remainder of pipe_sampler_view.
 172     */
 173 }
 174
 175
 176 /**
 177  * Generate code to compute coordinate gradient (rho).
 178  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 179  * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 180  *
 181  * XXX: The resulting rho is scalar, so we ignore all but the first element of
 182  * derivatives that are passed by the shader.
 183  */
 184 static LLVMValueRef
 185 lp_build_rho(struct lp_build_sample_context *bld,
 186              unsigned unit,
 187              const LLVMValueRef ddx[4],
 188              const LLVMValueRef ddy[4])
 189 {
 190    struct lp_build_context *int_size_bld = &bld->int_size_bld;
 191    struct lp_build_context *float_size_bld = &bld->float_size_bld;
 192    struct lp_build_context *float_bld = &bld->float_bld;
 193    const unsigned dims = bld->dims;
 194    LLVMBuilderRef builder = bld->gallivm->builder;
 195    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
 196    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
 197    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
 198    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
 199    LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
 200    LLVMValueRef rho_x, rho_y;
 201    LLVMValueRef rho_vec;
 202    LLVMValueRef int_size, float_size;
 203    LLVMValueRef rho;
 204    LLVMValueRef first_level, first_level_vec;
 205
 206    dsdx = ddx[0];
 207    dsdy = ddy[0];
 208
 209    if (dims <= 1) {
 210       rho_x = dsdx;
 211       rho_y = dsdy;
 212    }
 213    else {
 214       rho_x = float_size_bld->undef;
 215       rho_y = float_size_bld->undef;
 216
 217       rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, "");
 218       rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, "");
 219
 220       dtdx = ddx[1];
 221       dtdy = ddy[1];
 222
 223       rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, "");
 224       rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, "");
 225
 226       if (dims >= 3) {
 227          drdx = ddx[2];
 228          drdy = ddy[2];
 229
 230          rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, "");
 231          rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, "");
 232       }
 233    }
 234
 235    rho_x = lp_build_abs(float_size_bld, rho_x);
 236    rho_y = lp_build_abs(float_size_bld, rho_y);
 237
 238    rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
 239
 240    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
 241                                                  bld->gallivm, unit);
 242    first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
 243    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
 244    float_size = lp_build_int_to_float(float_size_bld, int_size);
 245
 246    rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
 247
 248    if (dims <= 1) {
 249       rho = rho_vec;
 250    }
 251    else {
 252       if (dims >= 2) {
 253          LLVMValueRef rho_s, rho_t, rho_r;
 254
 255          rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
 256          rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
 257
 258          rho = lp_build_max(float_bld, rho_s, rho_t);
 259
 260          if (dims >= 3) {
 261             rho_r = LLVMBuildExtractElement(builder, rho_vec, index0, "");
 262             rho = lp_build_max(float_bld, rho, rho_r);
 263          }
 264       }
 265    }
 266
 267    return rho;
 268 }
 269
 270
 271 /*
 272  * Bri-linear lod computation
 273  *
 274  * Use a piece-wise linear approximation of log2 such that:
 275  * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
 276  * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
 277  *   with the steepness specified in 'factor'
 278  * - exact result for 0.5, 1.5, etc.
 279  *
 280  *
 281  *   1.0 -              /----*
 282  *                     /
 283  *                    /
 284  *                   /
 285  *   0.5 -          *
 286  *                 /
 287  *                /
 288  *               /
 289  *   0.0 - *----/
 290  *
 291  *         |                 |
 292  *        2^0               2^1
 293  *
 294  * This is a technique also commonly used in hardware:
 295  * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
 296  *
 297  * TODO: For correctness, this should only be applied when texture is known to
 298  * have regular mipmaps, i.e., mipmaps derived from the base level.
 299  *
 300  * TODO: This could be done in fixed point, where applicable.
 301  */
 302 static void
 303 lp_build_brilinear_lod(struct lp_build_context *bld,
 304                        LLVMValueRef lod,
 305                        double factor,
 306                        LLVMValueRef *out_lod_ipart,
 307                        LLVMValueRef *out_lod_fpart)
 308 {
 309    LLVMValueRef lod_fpart;
 310    double pre_offset = (factor - 0.5)/factor - 0.5;
 311    double post_offset = 1 - factor;
 312
 313    if (0) {
 314       lp_build_printf(bld->gallivm, "lod = %f\n", lod);
 315    }
 316
 317    lod = lp_build_add(bld, lod,
 318                       lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
 319
 320    lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
 321
 322    lod_fpart = lp_build_mul(bld, lod_fpart,
 323                             lp_build_const_vec(bld->gallivm, bld->type, factor));
 324
 325    lod_fpart = lp_build_add(bld, lod_fpart,
 326                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
 327
 328    /*
 329     * It's not necessary to clamp lod_fpart since:
 330     * - the above expression will never produce numbers greater than one.
 331     * - the mip filtering branch is only taken if lod_fpart is positive
 332     */
 333
 334    *out_lod_fpart = lod_fpart;
 335
 336    if (0) {
 337       lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
 338       lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
 339    }
 340 }
 341
 342
 343 /*
 344  * Combined log2 and brilinear lod computation.
 345  *
 346  * It's in all identical to calling lp_build_fast_log2() and
 347  * lp_build_brilinear_lod() above, but by combining we can compute the integer
 348  * and fractional part independently.
 349  */
 350 static void
 351 lp_build_brilinear_rho(struct lp_build_context *bld,
 352                        LLVMValueRef rho,
 353                        double factor,
 354                        LLVMValueRef *out_lod_ipart,
 355                        LLVMValueRef *out_lod_fpart)
 356 {
 357    LLVMValueRef lod_ipart;
 358    LLVMValueRef lod_fpart;
 359
 360    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
 361    const double post_offset = 1 - 2*factor;
 362
 363    assert(bld->type.floating);
 364
 365    assert(lp_check_value(bld->type, rho));
 366
 367    /*
 368     * The pre factor will make the intersections with the exact powers of two
 369     * happen precisely where we want then to be, which means that the integer
 370     * part will not need any post adjustments.
 371     */
 372    rho = lp_build_mul(bld, rho,
 373                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
 374
 375    /* ipart = ifloor(log2(rho)) */
 376    lod_ipart = lp_build_extract_exponent(bld, rho, 0);
 377
 378    /* fpart = rho / 2**ipart */
 379    lod_fpart = lp_build_extract_mantissa(bld, rho);
 380
 381    lod_fpart = lp_build_mul(bld, lod_fpart,
 382                             lp_build_const_vec(bld->gallivm, bld->type, factor));
 383
 384    lod_fpart = lp_build_add(bld, lod_fpart,
 385                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
 386
 387    /*
 388     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
 389     * - the above expression will never produce numbers greater than one.
 390     * - the mip filtering branch is only taken if lod_fpart is positive
 391     */
 392
 393    *out_lod_ipart = lod_ipart;
 394    *out_lod_fpart = lod_fpart;
 395 }
 396
 397
 398 /**
 399  * Generate code to compute texture level of detail (lambda).
 400  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 401  * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 402  * \param lod_bias  optional float vector with the shader lod bias
 403  * \param explicit_lod  optional float vector with the explicit lod
 404  * \param width  scalar int texture width
 405  * \param height  scalar int texture height
 406  * \param depth  scalar int texture depth
 407  *
 408  * XXX: The resulting lod is scalar, so ignore all but the first element of
 409  * derivatives, lod_bias, etc that are passed by the shader.
 410  */
 411 void
 412 lp_build_lod_selector(struct lp_build_sample_context *bld,
 413                       unsigned unit,
 414                       const LLVMValueRef ddx[4],
 415                       const LLVMValueRef ddy[4],
 416                       LLVMValueRef lod_bias, /* optional */
 417                       LLVMValueRef explicit_lod, /* optional */
 418                       unsigned mip_filter,
 419                       LLVMValueRef *out_lod_ipart,
 420                       LLVMValueRef *out_lod_fpart)
 421
 422 {
 423    LLVMBuilderRef builder = bld->gallivm->builder;
 424    struct lp_build_context *float_bld = &bld->float_bld;
 425    LLVMValueRef lod;
 426
 427    *out_lod_ipart = bld->int_bld.zero;
 428    *out_lod_fpart = bld->float_bld.zero;
 429
 430    if (bld->static_state->min_max_lod_equal) {
 431       /* User is forcing sampling from a particular mipmap level.
 432        * This is hit during mipmap generation.
 433        */
 434       LLVMValueRef min_lod =
 435          bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);
 436
 437       lod = min_lod;
 438    }
 439    else {
 440       LLVMValueRef sampler_lod_bias =
 441          bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit);
 442       LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0);
 443
 444       if (explicit_lod) {
 445          lod = LLVMBuildExtractElement(builder, explicit_lod,
 446                                        index0, "");
 447       }
 448       else {
 449          LLVMValueRef rho;
 450
 451          rho = lp_build_rho(bld, unit, ddx, ddy);
 452
 453          /*
 454           * Compute lod = log2(rho)
 455           */
 456
 457          if (!lod_bias &&
 458              !bld->static_state->lod_bias_non_zero &&
 459              !bld->static_state->apply_max_lod &&
 460              !bld->static_state->apply_min_lod) {
 461             /*
 462              * Special case when there are no post-log2 adjustments, which
 463              * saves instructions but keeping the integer and fractional lod
 464              * computations separate from the start.
 465              */
 466
 467             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
 468                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
 469                *out_lod_ipart = lp_build_ilog2(float_bld, rho);
 470                *out_lod_fpart = bld->float_bld.zero;
 471                return;
 472             }
 473             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
 474                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
 475                lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
 476                                       out_lod_ipart, out_lod_fpart);
 477                return;
 478             }
 479          }
 480
 481          if (0) {
 482             lod = lp_build_log2(float_bld, rho);
 483          }
 484          else {
 485             lod = lp_build_fast_log2(float_bld, rho);
 486          }
 487
 488          /* add shader lod bias */
 489          if (lod_bias) {
 490             lod_bias = LLVMBuildExtractElement(builder, lod_bias,
 491                                                index0, "");
 492             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
 493          }
 494       }
 495
 496       /* add sampler lod bias */
 497       if (bld->static_state->lod_bias_non_zero)
 498          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
 499
 500
 501       /* clamp lod */
 502       if (bld->static_state->apply_max_lod) {
 503          LLVMValueRef max_lod =
 504             bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit);
 505
 506          lod = lp_build_min(float_bld, lod, max_lod);
 507       }
 508       if (bld->static_state->apply_min_lod) {
 509          LLVMValueRef min_lod =
 510             bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);
 511
 512          lod = lp_build_max(float_bld, lod, min_lod);
 513       }
 514    }
 515
 516    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 517       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
 518          lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
 519                                 out_lod_ipart, out_lod_fpart);
 520       }
 521       else {
 522          lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
 523       }
 524
 525       lp_build_name(*out_lod_fpart, "lod_fpart");
 526    }
 527    else {
 528       *out_lod_ipart = lp_build_iround(float_bld, lod);
 529    }
 530
 531    lp_build_name(*out_lod_ipart, "lod_ipart");
 532
 533    return;
 534 }
 535
 536
 537 /**
 538  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 539  * mipmap level index.
 540  * Note: this is all scalar code.
 541  * \param lod  scalar float texture level of detail
 542  * \param level_out  returns integer
 543  */
 544 void
 545 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 546                            unsigned unit,
 547                            LLVMValueRef lod_ipart,
 548                            LLVMValueRef *level_out)
 549 {
 550    struct lp_build_context *int_bld = &bld->int_bld;
 551    LLVMValueRef first_level, last_level, level;
 552
 553    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
 554                                                  bld->gallivm, unit);
 555    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 556                                                bld->gallivm, unit);
 557
 558    /* convert float lod to integer */
 559    level = lp_build_add(int_bld, lod_ipart, first_level);
 560
 561    /* clamp level to legal range of levels */
 562    *level_out = lp_build_clamp(int_bld, level, first_level, last_level);
 563 }
 564
 565
 566 /**
 567  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 568  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 569  * two mipmap levels and interpolate between them.
 570  */
 571 void
 572 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 573                            unsigned unit,
 574                            LLVMValueRef lod_ipart,
 575                            LLVMValueRef *lod_fpart_inout,
 576                            LLVMValueRef *level0_out,
 577                            LLVMValueRef *level1_out)
 578 {
 579    LLVMBuilderRef builder = bld->gallivm->builder;
 580    struct lp_build_context *int_bld = &bld->int_bld;
 581    struct lp_build_context *float_bld = &bld->float_bld;
 582    LLVMValueRef first_level, last_level;
 583    LLVMValueRef clamp_min;
 584    LLVMValueRef clamp_max;
 585
 586    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
 587                                                  bld->gallivm, unit);
 588
 589    *level0_out = lp_build_add(int_bld, lod_ipart, first_level);
 590    *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
 591
 592    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 593                                                bld->gallivm, unit);
 594
 595    /*
 596     * Clamp both *level0_out and *level1_out to [first_level, last_level], with
 597     * the minimum number of comparisons, and zeroing lod_fpart in the extreme
 598     * ends in the process.
 599     */
 600
 601    /* *level0_out < first_level */
 602    clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
 603                              *level0_out, first_level,
 604                              "clamp_lod_to_first");
 605
 606    *level0_out = LLVMBuildSelect(builder, clamp_min,
 607                                  first_level, *level0_out, "");
 608
 609    *level1_out = LLVMBuildSelect(builder, clamp_min,
 610                                  first_level, *level1_out, "");
 611
 612    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
 613                                       float_bld->zero, *lod_fpart_inout, "");
 614
 615    /* *level0_out >= last_level */
 616    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
 617                              *level0_out, last_level,
 618                              "clamp_lod_to_last");
 619
 620    *level0_out = LLVMBuildSelect(builder, clamp_max,
 621                                  last_level, *level0_out, "");
 622
 623    *level1_out = LLVMBuildSelect(builder, clamp_max,
 624                                  last_level, *level1_out, "");
 625
 626    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
 627                                       float_bld->zero, *lod_fpart_inout, "");
 628
 629    lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
 630    lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
 631    lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
 632 }
 633
 634
 635 /**
 636  * Return pointer to a single mipmap level.
 637  * \param data_array  array of pointers to mipmap levels
 638  * \param level  integer mipmap level
 639  */
 640 LLVMValueRef
 641 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 642                           LLVMValueRef level)
 643 {
 644    LLVMBuilderRef builder = bld->gallivm->builder;
 645    LLVMValueRef indexes[2], data_ptr;
 646
 647    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
 648    indexes[1] = level;
 649    data_ptr = LLVMBuildGEP(builder, bld->data_array, indexes, 2, "");
 650    data_ptr = LLVMBuildLoad(builder, data_ptr, "");
 651    return data_ptr;
 652 }
 653
 654
 655 LLVMValueRef
 656 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 657                                 int level)
 658 {
 659    LLVMValueRef lvl = lp_build_const_int32(bld->gallivm, level);
 660    return lp_build_get_mipmap_level(bld, lvl);
 661 }
 662
 663
 664 /**
 665  * Codegen equivalent for u_minify().
 666  * Return max(1, base_size >> level);
 667  */
 668 LLVMValueRef
 669 lp_build_minify(struct lp_build_context *bld,
 670                 LLVMValueRef base_size,
 671                 LLVMValueRef level)
 672 {
 673    LLVMBuilderRef builder = bld->gallivm->builder;
 674    assert(lp_check_value(bld->type, base_size));
 675    assert(lp_check_value(bld->type, level));
 676
 677    if (level == bld->zero) {
 678       /* if we're using mipmap level zero, no minification is needed */
 679       return base_size;
 680    }
 681    else {
 682       LLVMValueRef size =
 683          LLVMBuildLShr(builder, base_size, level, "minify");
 684       assert(bld->type.sign);
 685       size = lp_build_max(bld, size, bld->one);
 686       return size;
 687    }
 688 }
 689
 690
 691 /**
 692  * Dereference stride_array[mipmap_level] array to get a stride.
 693  * Return stride as a vector.
 694  */
 695 static LLVMValueRef
 696 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 697                               LLVMValueRef stride_array, LLVMValueRef level)
 698 {
 699    LLVMBuilderRef builder = bld->gallivm->builder;
 700    LLVMValueRef indexes[2], stride;
 701    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
 702    indexes[1] = level;
 703    stride = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
 704    stride = LLVMBuildLoad(builder, stride, "");
 705    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 706    return stride;
 707 }
 708
 709
 710 /**
 711  * When sampling a mipmap, we need to compute the width, height, depth
 712  * of the source levels from the level indexes.  This helper function
 713  * does that.
 714  */
 715 void
 716 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
 717                             LLVMValueRef ilevel,
 718                             LLVMValueRef *out_size,
 719                             LLVMValueRef *row_stride_vec,
 720                             LLVMValueRef *img_stride_vec)
 721 {
 722    const unsigned dims = bld->dims;
 723    LLVMValueRef ilevel_vec;
 724
 725    ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
 726
 727    /*
 728     * Compute width, height, depth at mipmap level 'ilevel'
 729     */
 730    *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
 731
 732    if (dims >= 2) {
 733       *row_stride_vec = lp_build_get_level_stride_vec(bld,
 734                                                       bld->row_stride_array,
 735                                                       ilevel);
 736       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
 737          *img_stride_vec = lp_build_get_level_stride_vec(bld,
 738                                                          bld->img_stride_array,
 739                                                          ilevel);
 740       }
 741    }
 742 }
 743
 744
 745 /**
 746  * Extract and broadcast texture size.
 747  *
 748  * @param size_type   type of the texture size vector (either
 749  *                    bld->int_size_type or bld->float_size_type)
 750  * @param coord_type  type of the texture size vector (either
 751  *                    bld->int_coord_type or bld->coord_type)
 752  * @param int_size    vector with the integer texture size (width, height,
 753  *                    depth)
 754  */
 755 void
 756 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
 757                              struct lp_type size_type,
 758                              struct lp_type coord_type,
 759                              LLVMValueRef size,
 760                              LLVMValueRef *out_width,
 761                              LLVMValueRef *out_height,
 762                              LLVMValueRef *out_depth)
 763 {
 764    const unsigned dims = bld->dims;
 765    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
 766
 767    *out_width = lp_build_extract_broadcast(bld->gallivm,
 768                                            size_type,
 769                                            coord_type,
 770                                            size,
 771                                            LLVMConstInt(i32t, 0, 0));
 772    if (dims >= 2) {
 773       *out_height = lp_build_extract_broadcast(bld->gallivm,
 774                                                size_type,
 775                                                coord_type,
 776                                                size,
 777                                                LLVMConstInt(i32t, 1, 0));
 778       if (dims == 3) {
 779          *out_depth = lp_build_extract_broadcast(bld->gallivm,
 780                                                  size_type,
 781                                                  coord_type,
 782                                                  size,
 783                                                  LLVMConstInt(i32t, 2, 0));
 784       }
 785    }
 786 }
 787
 788
 789 /**
 790  * Unnormalize coords.
 791  *
 792  * @param int_size  vector with the integer texture size (width, height, depth)
 793  */
 794 void
 795 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
 796                              LLVMValueRef flt_size,
 797                              LLVMValueRef *s,
 798                              LLVMValueRef *t,
 799                              LLVMValueRef *r)
 800 {
 801    const unsigned dims = bld->dims;
 802    LLVMValueRef width;
 803    LLVMValueRef height;
 804    LLVMValueRef depth;
 805
 806    lp_build_extract_image_sizes(bld,
 807                                 bld->float_size_type,
 808                                 bld->coord_type,
 809                                 flt_size,
 810                                 &width,
 811                                 &height,
 812                                 &depth);
 813
 814    /* s = s * width, t = t * height */
 815    *s = lp_build_mul(&bld->coord_bld, *s, width);
 816    if (dims >= 2) {
 817       *t = lp_build_mul(&bld->coord_bld, *t, height);
 818       if (dims >= 3) {
 819          *r = lp_build_mul(&bld->coord_bld, *r, depth);
 820       }
 821    }
 822 }
 823
 824
 825 /** Helper used by lp_build_cube_lookup() */
 826 static LLVMValueRef
 827 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
 828 {
 829    /* ima = -0.5 / abs(coord); */
 830    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, -0.5);
 831    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
 832    LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
 833    return ima;
 834 }
 835
 836
 837 /**
 838  * Helper used by lp_build_cube_lookup()
 839  * \param sign  scalar +1 or -1
 840  * \param coord  float vector
 841  * \param ima  float vector
 842  */
 843 static LLVMValueRef
 844 lp_build_cube_coord(struct lp_build_context *coord_bld,
 845                     LLVMValueRef sign, int negate_coord,
 846                     LLVMValueRef coord, LLVMValueRef ima)
 847 {
 848    /* return negate(coord) * ima * sign + 0.5; */
 849    LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
 850    LLVMValueRef res;
 851
 852    assert(negate_coord == +1 || negate_coord == -1);
 853
 854    if (negate_coord == -1) {
 855       coord = lp_build_negate(coord_bld, coord);
 856    }
 857
 858    res = lp_build_mul(coord_bld, coord, ima);
 859    if (sign) {
 860       sign = lp_build_broadcast_scalar(coord_bld, sign);
 861       res = lp_build_mul(coord_bld, res, sign);
 862    }
 863    res = lp_build_add(coord_bld, res, half);
 864
 865    return res;
 866 }
 867
 868
 869 /** Helper used by lp_build_cube_lookup()
 870  * Return (major_coord >= 0) ? pos_face : neg_face;
 871  */
 872 static LLVMValueRef
 873 lp_build_cube_face(struct lp_build_sample_context *bld,
 874                    LLVMValueRef major_coord,
 875                    unsigned pos_face, unsigned neg_face)
 876 {
 877    struct gallivm_state *gallivm = bld->gallivm;
 878    LLVMBuilderRef builder = gallivm->builder;
 879    LLVMValueRef cmp = LLVMBuildFCmp(builder, LLVMRealUGE,
 880                                     major_coord,
 881                                     bld->float_bld.zero, "");
 882    LLVMValueRef pos = lp_build_const_int32(gallivm, pos_face);
 883    LLVMValueRef neg = lp_build_const_int32(gallivm, neg_face);
 884    LLVMValueRef res = LLVMBuildSelect(builder, cmp, pos, neg, "");
 885    return res;
 886 }
 887
 888
 889
 890 /**
 891  * Generate code to do cube face selection and compute per-face texcoords.
 892  */
 893 void
 894 lp_build_cube_lookup(struct lp_build_sample_context *bld,
 895                      LLVMValueRef s,
 896                      LLVMValueRef t,
 897                      LLVMValueRef r,
 898                      LLVMValueRef *face,
 899                      LLVMValueRef *face_s,
 900                      LLVMValueRef *face_t)
 901 {
 902    struct lp_build_context *float_bld = &bld->float_bld;
 903    struct lp_build_context *coord_bld = &bld->coord_bld;
 904    LLVMBuilderRef builder = bld->gallivm->builder;
 905    LLVMValueRef rx, ry, rz;
 906    LLVMValueRef arx, ary, arz;
 907    LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25);
 908    LLVMValueRef arx_ge_ary, arx_ge_arz;
 909    LLVMValueRef ary_ge_arx, ary_ge_arz;
 910    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
 911
 912    assert(bld->coord_bld.type.length == 4);
 913
 914    /*
 915     * Use the average of the four pixel's texcoords to choose the face.
 916     */
 917    rx = lp_build_mul(float_bld, c25,
 918                      lp_build_sum_vector(&bld->coord_bld, s));
 919    ry = lp_build_mul(float_bld, c25,
 920                      lp_build_sum_vector(&bld->coord_bld, t));
 921    rz = lp_build_mul(float_bld, c25,
 922                      lp_build_sum_vector(&bld->coord_bld, r));
 923
 924    arx = lp_build_abs(float_bld, rx);
 925    ary = lp_build_abs(float_bld, ry);
 926    arz = lp_build_abs(float_bld, rz);
 927
 928    /*
 929     * Compare sign/magnitude of rx,ry,rz to determine face
 930     */
 931    arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, "");
 932    arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, "");
 933    ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, "");
 934    ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, "");
 935
 936    arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, "");
 937    ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, "");
 938
 939    {
 940       struct lp_build_if_state if_ctx;
 941       LLVMValueRef face_s_var;
 942       LLVMValueRef face_t_var;
 943       LLVMValueRef face_var;
 944
 945       face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var");
 946       face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var");
 947       face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var");
 948
 949       lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz);
 950       {
 951          /* +/- X face */
 952          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
 953          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
 954          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
 955          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
 956          *face = lp_build_cube_face(bld, rx,
 957                                     PIPE_TEX_FACE_POS_X,
 958                                     PIPE_TEX_FACE_NEG_X);
 959          LLVMBuildStore(builder, *face_s, face_s_var);
 960          LLVMBuildStore(builder, *face_t, face_t_var);
 961          LLVMBuildStore(builder, *face, face_var);
 962       }
 963       lp_build_else(&if_ctx);
 964       {
 965          struct lp_build_if_state if_ctx2;
 966
 967          lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz);
 968          {
 969             /* +/- Y face */
 970             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
 971             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
 972             *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
 973             *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
 974             *face = lp_build_cube_face(bld, ry,
 975                                        PIPE_TEX_FACE_POS_Y,
 976                                        PIPE_TEX_FACE_NEG_Y);
 977             LLVMBuildStore(builder, *face_s, face_s_var);
 978             LLVMBuildStore(builder, *face_t, face_t_var);
 979             LLVMBuildStore(builder, *face, face_var);
 980          }
 981          lp_build_else(&if_ctx2);
 982          {
 983             /* +/- Z face */
 984             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
 985             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
 986             *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
 987             *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
 988             *face = lp_build_cube_face(bld, rz,
 989                                        PIPE_TEX_FACE_POS_Z,
 990                                        PIPE_TEX_FACE_NEG_Z);
 991             LLVMBuildStore(builder, *face_s, face_s_var);
 992             LLVMBuildStore(builder, *face_t, face_t_var);
 993             LLVMBuildStore(builder, *face, face_var);
 994          }
 995          lp_build_endif(&if_ctx2);
 996       }
 997
 998       lp_build_endif(&if_ctx);
 999
1000       *face_s = LLVMBuildLoad(builder, face_s_var, "face_s");
1001       *face_t = LLVMBuildLoad(builder, face_t_var, "face_t");
1002       *face   = LLVMBuildLoad(builder, face_var, "face");
1003    }
1004 }
1005
1006
1007 /**
1008  * Compute the partial offset of a pixel block along an arbitrary axis.
1009  *
1010  * @param coord   coordinate in pixels
1011  * @param stride  number of bytes between rows of successive pixel blocks
1012  * @param block_length  number of pixels in a pixels block along the coordinate
1013  *                      axis
1014  * @param out_offset    resulting relative offset of the pixel block in bytes
1015  * @param out_subcoord  resulting sub-block pixel coordinate
1016  */
1017 void
1018 lp_build_sample_partial_offset(struct lp_build_context *bld,
1019                                unsigned block_length,
1020                                LLVMValueRef coord,
1021                                LLVMValueRef stride,
1022                                LLVMValueRef *out_offset,
1023                                LLVMValueRef *out_subcoord)
1024 {
1025    LLVMBuilderRef builder = bld->gallivm->builder;
1026    LLVMValueRef offset;
1027    LLVMValueRef subcoord;
1028
1029    if (block_length == 1) {
1030       subcoord = bld->zero;
1031    }
1032    else {
1033       /*
1034        * Pixel blocks have power of two dimensions. LLVM should convert the
1035        * rem/div to bit arithmetic.
1036        * TODO: Verify this.
1037        * It does indeed BUT it does transform it to scalar (and back) when doing so
1038        * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
1039        * The generated code looks seriously unfunny and is quite expensive.
1040        */
1041 #if 0
1042       LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
1043       subcoord = LLVMBuildURem(builder, coord, block_width, "");
1044       coord    = LLVMBuildUDiv(builder, coord, block_width, "");
1045 #else
1046       unsigned logbase2 = util_logbase2(block_length);
1047       LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
1048       LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
1049       subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
1050       coord = LLVMBuildLShr(builder, coord, block_shift, "");
1051 #endif
1052    }
1053
1054    offset = lp_build_mul(bld, coord, stride);
1055
1056    assert(out_offset);
1057    assert(out_subcoord);
1058
1059    *out_offset = offset;
1060    *out_subcoord = subcoord;
1061 }
1062
1063
1064 /**
1065  * Compute the offset of a pixel block.
1066  *
1067  * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
1068  *
1069  * Returns the relative offset and i,j sub-block coordinates
1070  */
1071 void
1072 lp_build_sample_offset(struct lp_build_context *bld,
1073                        const struct util_format_description *format_desc,
1074                        LLVMValueRef x,
1075                        LLVMValueRef y,
1076                        LLVMValueRef z,
1077                        LLVMValueRef y_stride,
1078                        LLVMValueRef z_stride,
1079                        LLVMValueRef *out_offset,
1080                        LLVMValueRef *out_i,
1081                        LLVMValueRef *out_j)
1082 {
1083    LLVMValueRef x_stride;
1084    LLVMValueRef offset;
1085
1086    x_stride = lp_build_const_vec(bld->gallivm, bld->type,
1087                                  format_desc->block.bits/8);
1088
1089    lp_build_sample_partial_offset(bld,
1090                                   format_desc->block.width,
1091                                   x, x_stride,
1092                                   &offset, out_i);
1093
1094    if (y && y_stride) {
1095       LLVMValueRef y_offset;
1096       lp_build_sample_partial_offset(bld,
1097                                      format_desc->block.height,
1098                                      y, y_stride,
1099                                      &y_offset, out_j);
1100       offset = lp_build_add(bld, offset, y_offset);
1101    }
1102    else {
1103       *out_j = bld->zero;
1104    }
1105
1106    if (z && z_stride) {
1107       LLVMValueRef z_offset;
1108       LLVMValueRef k;
1109       lp_build_sample_partial_offset(bld,
1110                                      1, /* pixel blocks are always 2D */
1111                                      z, z_stride,
1112                                      &z_offset, &k);
1113       offset = lp_build_add(bld, offset, z_offset);
1114    }
1115
1116    *out_offset = offset;
1117 }