src/gallium/auxiliary/gallivm/lp_bld_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- common code.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35 #include "pipe/p_defines.h"
  36 #include "pipe/p_state.h"
  37 #include "util/u_format.h"
  38 #include "util/u_math.h"
  39 #include "lp_bld_arit.h"
  40 #include "lp_bld_const.h"
  41 #include "lp_bld_debug.h"
  42 #include "lp_bld_printf.h"
  43 #include "lp_bld_flow.h"
  44 #include "lp_bld_sample.h"
  45 #include "lp_bld_swizzle.h"
  46 #include "lp_bld_type.h"
  47
  48
  49 /*
  50  * Bri-linear factor. Should be greater than one.
  51  */
  52 #define BRILINEAR_FACTOR 2
  53
  54
  55 /**
  56  * Does the given texture wrap mode allow sampling the texture border color?
  57  * XXX maybe move this into gallium util code.
  58  */
  59 boolean
  60 lp_sampler_wrap_mode_uses_border_color(unsigned mode,
  61                                        unsigned min_img_filter,
  62                                        unsigned mag_img_filter)
  63 {
  64    switch (mode) {
  65    case PIPE_TEX_WRAP_REPEAT:
  66    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
  67    case PIPE_TEX_WRAP_MIRROR_REPEAT:
  68    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  69       return FALSE;
  70    case PIPE_TEX_WRAP_CLAMP:
  71    case PIPE_TEX_WRAP_MIRROR_CLAMP:
  72       if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
  73           mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
  74          return FALSE;
  75       } else {
  76          return TRUE;
  77       }
  78    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
  79    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  80       return TRUE;
  81    default:
  82       assert(0 && "unexpected wrap mode");
  83       return FALSE;
  84    }
  85 }
  86
  87
  88 /**
  89  * Initialize lp_sampler_static_state object with the gallium sampler
  90  * and texture state.
  91  * The former is considered to be static and the later dynamic.
  92  */
  93 void
  94 lp_sampler_static_state(struct lp_sampler_static_state *state,
  95                         const struct pipe_sampler_view *view,
  96                         const struct pipe_sampler_state *sampler)
  97 {
  98    const struct pipe_resource *texture = view->texture;
  99
 100    memset(state, 0, sizeof *state);
 101
 102    if(!texture)
 103       return;
 104
 105    if(!sampler)
 106       return;
 107
 108    /*
 109     * We don't copy sampler state over unless it is actually enabled, to avoid
 110     * spurious recompiles, as the sampler static state is part of the shader
 111     * key.
 112     *
 113     * Ideally the state tracker or cso_cache module would make all state
 114     * canonical, but until that happens it's better to be safe than sorry here.
 115     *
 116     * XXX: Actually there's much more than can be done here, especially
 117     * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
 118     */
 119
 120    state->format            = view->format;
 121    state->swizzle_r         = view->swizzle_r;
 122    state->swizzle_g         = view->swizzle_g;
 123    state->swizzle_b         = view->swizzle_b;
 124    state->swizzle_a         = view->swizzle_a;
 125
 126    state->target            = texture->target;
 127    state->pot_width         = util_is_power_of_two(texture->width0);
 128    state->pot_height        = util_is_power_of_two(texture->height0);
 129    state->pot_depth         = util_is_power_of_two(texture->depth0);
 130
 131    state->wrap_s            = sampler->wrap_s;
 132    state->wrap_t            = sampler->wrap_t;
 133    state->wrap_r            = sampler->wrap_r;
 134    state->min_img_filter    = sampler->min_img_filter;
 135    state->mag_img_filter    = sampler->mag_img_filter;
 136
 137    if (view->u.tex.last_level && sampler->max_lod > 0.0f) {
 138       state->min_mip_filter = sampler->min_mip_filter;
 139    } else {
 140       state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
 141    }
 142
 143    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 144       if (sampler->lod_bias != 0.0f) {
 145          state->lod_bias_non_zero = 1;
 146       }
 147
 148       /* If min_lod == max_lod we can greatly simplify mipmap selection.
 149        * This is a case that occurs during automatic mipmap generation.
 150        */
 151       if (sampler->min_lod == sampler->max_lod) {
 152          state->min_max_lod_equal = 1;
 153       } else {
 154          if (sampler->min_lod > 0.0f) {
 155             state->apply_min_lod = 1;
 156          }
 157
 158          if (sampler->max_lod < (float)view->u.tex.last_level) {
 159             state->apply_max_lod = 1;
 160          }
 161       }
 162    }
 163
 164    state->compare_mode      = sampler->compare_mode;
 165    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
 166       state->compare_func   = sampler->compare_func;
 167    }
 168
 169    state->normalized_coords = sampler->normalized_coords;
 170
 171    /*
 172     * FIXME: Handle the remainder of pipe_sampler_view.
 173     */
 174 }
 175
 176
 177 /**
 178  * Generate code to compute coordinate gradient (rho).
 179  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 180  * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 181  *
 182  * XXX: The resulting rho is scalar, so we ignore all but the first element of
 183  * derivatives that are passed by the shader.
 184  */
 185 static LLVMValueRef
 186 lp_build_rho(struct lp_build_sample_context *bld,
 187              const LLVMValueRef ddx[4],
 188              const LLVMValueRef ddy[4])
 189 {
 190    struct lp_build_context *float_size_bld = &bld->float_size_bld;
 191    struct lp_build_context *float_bld = &bld->float_bld;
 192    const unsigned dims = bld->dims;
 193    LLVMBuilderRef builder = bld->gallivm->builder;
 194    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
 195    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
 196    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
 197    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
 198    LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
 199    LLVMValueRef rho_x, rho_y;
 200    LLVMValueRef rho_vec;
 201    LLVMValueRef float_size;
 202    LLVMValueRef rho;
 203
 204    dsdx = ddx[0];
 205    dsdy = ddy[0];
 206
 207    if (dims <= 1) {
 208       rho_x = dsdx;
 209       rho_y = dsdy;
 210    }
 211    else {
 212       rho_x = float_size_bld->undef;
 213       rho_y = float_size_bld->undef;
 214
 215       rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, "");
 216       rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, "");
 217
 218       dtdx = ddx[1];
 219       dtdy = ddy[1];
 220
 221       rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, "");
 222       rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, "");
 223
 224       if (dims >= 3) {
 225          drdx = ddx[2];
 226          drdy = ddy[2];
 227
 228          rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, "");
 229          rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, "");
 230       }
 231    }
 232
 233    rho_x = lp_build_abs(float_size_bld, rho_x);
 234    rho_y = lp_build_abs(float_size_bld, rho_y);
 235
 236    rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
 237
 238    float_size = lp_build_int_to_float(float_size_bld, bld->int_size);
 239
 240    rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
 241
 242    if (dims <= 1) {
 243       rho = rho_vec;
 244    }
 245    else {
 246       if (dims >= 2) {
 247          LLVMValueRef rho_s, rho_t, rho_r;
 248
 249          rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
 250          rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
 251
 252          rho = lp_build_max(float_bld, rho_s, rho_t);
 253
 254          if (dims >= 3) {
 255             rho_r = LLVMBuildExtractElement(builder, rho_vec, index0, "");
 256             rho = lp_build_max(float_bld, rho, rho_r);
 257          }
 258       }
 259    }
 260
 261    return rho;
 262 }
 263
 264
 265 /*
 266  * Bri-linear lod computation
 267  *
 268  * Use a piece-wise linear approximation of log2 such that:
 269  * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
 270  * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
 271  *   with the steepness specified in 'factor'
 272  * - exact result for 0.5, 1.5, etc.
 273  *
 274  *
 275  *   1.0 -              /----*
 276  *                     /
 277  *                    /
 278  *                   /
 279  *   0.5 -          *
 280  *                 /
 281  *                /
 282  *               /
 283  *   0.0 - *----/
 284  *
 285  *         |                 |
 286  *        2^0               2^1
 287  *
 288  * This is a technique also commonly used in hardware:
 289  * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
 290  *
 291  * TODO: For correctness, this should only be applied when texture is known to
 292  * have regular mipmaps, i.e., mipmaps derived from the base level.
 293  *
 294  * TODO: This could be done in fixed point, where applicable.
 295  */
 296 static void
 297 lp_build_brilinear_lod(struct lp_build_context *bld,
 298                        LLVMValueRef lod,
 299                        double factor,
 300                        LLVMValueRef *out_lod_ipart,
 301                        LLVMValueRef *out_lod_fpart)
 302 {
 303    LLVMValueRef lod_fpart;
 304    double pre_offset = (factor - 0.5)/factor - 0.5;
 305    double post_offset = 1 - factor;
 306
 307    if (0) {
 308       lp_build_printf(bld->gallivm, "lod = %f\n", lod);
 309    }
 310
 311    lod = lp_build_add(bld, lod,
 312                       lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
 313
 314    lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
 315
 316    lod_fpart = lp_build_mul(bld, lod_fpart,
 317                             lp_build_const_vec(bld->gallivm, bld->type, factor));
 318
 319    lod_fpart = lp_build_add(bld, lod_fpart,
 320                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
 321
 322    /*
 323     * It's not necessary to clamp lod_fpart since:
 324     * - the above expression will never produce numbers greater than one.
 325     * - the mip filtering branch is only taken if lod_fpart is positive
 326     */
 327
 328    *out_lod_fpart = lod_fpart;
 329
 330    if (0) {
 331       lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
 332       lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
 333    }
 334 }
 335
 336
 337 /*
 338  * Combined log2 and brilinear lod computation.
 339  *
 340  * It's in all identical to calling lp_build_fast_log2() and
 341  * lp_build_brilinear_lod() above, but by combining we can compute the interger
 342  * and fractional part independently.
 343  */
 344 static void
 345 lp_build_brilinear_rho(struct lp_build_context *bld,
 346                        LLVMValueRef rho,
 347                        double factor,
 348                        LLVMValueRef *out_lod_ipart,
 349                        LLVMValueRef *out_lod_fpart)
 350 {
 351    LLVMValueRef lod_ipart;
 352    LLVMValueRef lod_fpart;
 353
 354    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
 355    const double post_offset = 1 - 2*factor;
 356
 357    assert(bld->type.floating);
 358
 359    assert(lp_check_value(bld->type, rho));
 360
 361    /*
 362     * The pre factor will make the intersections with the exact powers of two
 363     * happen precisely where we want then to be, which means that the integer
 364     * part will not need any post adjustments.
 365     */
 366    rho = lp_build_mul(bld, rho,
 367                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
 368
 369    /* ipart = ifloor(log2(rho)) */
 370    lod_ipart = lp_build_extract_exponent(bld, rho, 0);
 371
 372    /* fpart = rho / 2**ipart */
 373    lod_fpart = lp_build_extract_mantissa(bld, rho);
 374
 375    lod_fpart = lp_build_mul(bld, lod_fpart,
 376                             lp_build_const_vec(bld->gallivm, bld->type, factor));
 377
 378    lod_fpart = lp_build_add(bld, lod_fpart,
 379                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
 380
 381    /*
 382     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
 383     * - the above expression will never produce numbers greater than one.
 384     * - the mip filtering branch is only taken if lod_fpart is positive
 385     */
 386
 387    *out_lod_ipart = lod_ipart;
 388    *out_lod_fpart = lod_fpart;
 389 }
 390
 391
 392 /**
 393  * Generate code to compute texture level of detail (lambda).
 394  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 395  * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 396  * \param lod_bias  optional float vector with the shader lod bias
 397  * \param explicit_lod  optional float vector with the explicit lod
 398  * \param width  scalar int texture width
 399  * \param height  scalar int texture height
 400  * \param depth  scalar int texture depth
 401  *
 402  * XXX: The resulting lod is scalar, so ignore all but the first element of
 403  * derivatives, lod_bias, etc that are passed by the shader.
 404  */
 405 void
 406 lp_build_lod_selector(struct lp_build_sample_context *bld,
 407                       unsigned unit,
 408                       const LLVMValueRef ddx[4],
 409                       const LLVMValueRef ddy[4],
 410                       LLVMValueRef lod_bias, /* optional */
 411                       LLVMValueRef explicit_lod, /* optional */
 412                       unsigned mip_filter,
 413                       LLVMValueRef *out_lod_ipart,
 414                       LLVMValueRef *out_lod_fpart)
 415
 416 {
 417    LLVMBuilderRef builder = bld->gallivm->builder;
 418    struct lp_build_context *float_bld = &bld->float_bld;
 419    LLVMValueRef lod;
 420
 421    *out_lod_ipart = bld->int_bld.zero;
 422    *out_lod_fpart = bld->float_bld.zero;
 423
 424    if (bld->static_state->min_max_lod_equal) {
 425       /* User is forcing sampling from a particular mipmap level.
 426        * This is hit during mipmap generation.
 427        */
 428       LLVMValueRef min_lod =
 429          bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);
 430
 431       lod = min_lod;
 432    }
 433    else {
 434       LLVMValueRef sampler_lod_bias =
 435          bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit);
 436       LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0);
 437
 438       if (explicit_lod) {
 439          lod = LLVMBuildExtractElement(builder, explicit_lod,
 440                                        index0, "");
 441       }
 442       else {
 443          LLVMValueRef rho;
 444
 445          rho = lp_build_rho(bld, ddx, ddy);
 446
 447          /*
 448           * Compute lod = log2(rho)
 449           */
 450
 451          if (!lod_bias &&
 452              !bld->static_state->lod_bias_non_zero &&
 453              !bld->static_state->apply_max_lod &&
 454              !bld->static_state->apply_min_lod) {
 455             /*
 456              * Special case when there are no post-log2 adjustments, which
 457              * saves instructions but keeping the integer and fractional lod
 458              * computations separate from the start.
 459              */
 460
 461             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
 462                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
 463                *out_lod_ipart = lp_build_ilog2(float_bld, rho);
 464                *out_lod_fpart = bld->float_bld.zero;
 465                return;
 466             }
 467             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
 468                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
 469                lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
 470                                       out_lod_ipart, out_lod_fpart);
 471                return;
 472             }
 473          }
 474
 475          if (0) {
 476             lod = lp_build_log2(float_bld, rho);
 477          }
 478          else {
 479             lod = lp_build_fast_log2(float_bld, rho);
 480          }
 481
 482          /* add shader lod bias */
 483          if (lod_bias) {
 484             lod_bias = LLVMBuildExtractElement(builder, lod_bias,
 485                                                index0, "");
 486             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
 487          }
 488       }
 489
 490       /* add sampler lod bias */
 491       if (bld->static_state->lod_bias_non_zero)
 492          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
 493
 494
 495       /* clamp lod */
 496       if (bld->static_state->apply_max_lod) {
 497          LLVMValueRef max_lod =
 498             bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit);
 499
 500          lod = lp_build_min(float_bld, lod, max_lod);
 501       }
 502       if (bld->static_state->apply_min_lod) {
 503          LLVMValueRef min_lod =
 504             bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);
 505
 506          lod = lp_build_max(float_bld, lod, min_lod);
 507       }
 508    }
 509
 510    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 511       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
 512          lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
 513                                 out_lod_ipart, out_lod_fpart);
 514       }
 515       else {
 516          lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
 517       }
 518
 519       lp_build_name(*out_lod_fpart, "lod_fpart");
 520    }
 521    else {
 522       *out_lod_ipart = lp_build_iround(float_bld, lod);
 523    }
 524
 525    lp_build_name(*out_lod_ipart, "lod_ipart");
 526
 527    return;
 528 }
 529
 530
 531 /**
 532  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 533  * mipmap level index.
 534  * Note: this is all scalar code.
 535  * \param lod  scalar float texture level of detail
 536  * \param level_out  returns integer
 537  */
 538 void
 539 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 540                            unsigned unit,
 541                            LLVMValueRef lod_ipart,
 542                            LLVMValueRef *level_out)
 543 {
 544    struct lp_build_context *int_bld = &bld->int_bld;
 545    LLVMValueRef last_level, level;
 546
 547    LLVMValueRef zero = lp_build_const_int32(bld->gallivm, 0);
 548
 549    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 550                                                bld->gallivm, unit);
 551
 552    /* convert float lod to integer */
 553    level = lod_ipart;
 554
 555    /* clamp level to legal range of levels */
 556    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
 557 }
 558
 559
 560 /**
 561  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 562  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 563  * two mipmap levels and interpolate between them.
 564  */
 565 void
 566 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 567                            unsigned unit,
 568                            LLVMValueRef lod_ipart,
 569                            LLVMValueRef *lod_fpart_inout,
 570                            LLVMValueRef *level0_out,
 571                            LLVMValueRef *level1_out)
 572 {
 573    LLVMBuilderRef builder = bld->gallivm->builder;
 574    struct lp_build_context *int_bld = &bld->int_bld;
 575    struct lp_build_context *float_bld = &bld->float_bld;
 576    LLVMValueRef last_level;
 577    LLVMValueRef clamp_min;
 578    LLVMValueRef clamp_max;
 579
 580    *level0_out = lod_ipart;
 581    *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);
 582
 583    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 584                                                bld->gallivm, unit);
 585
 586    /*
 587     * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
 588     * minimum number of comparisons, and zeroing lod_fpart in the extreme
 589     * ends in the process.
 590     */
 591
 592    /* lod_ipart < 0 */
 593    clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
 594                              lod_ipart, int_bld->zero,
 595                              "clamp_lod_to_zero");
 596
 597    *level0_out = LLVMBuildSelect(builder, clamp_min,
 598                                  int_bld->zero, *level0_out, "");
 599
 600    *level1_out = LLVMBuildSelect(builder, clamp_min,
 601                                  int_bld->zero, *level1_out, "");
 602
 603    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
 604                                       float_bld->zero, *lod_fpart_inout, "");
 605
 606    /* lod_ipart >= last_level */
 607    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
 608                              lod_ipart, last_level,
 609                              "clamp_lod_to_last");
 610
 611    *level0_out = LLVMBuildSelect(builder, clamp_max,
 612                                  last_level, *level0_out, "");
 613
 614    *level1_out = LLVMBuildSelect(builder, clamp_max,
 615                                  last_level, *level1_out, "");
 616
 617    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
 618                                       float_bld->zero, *lod_fpart_inout, "");
 619
 620    lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
 621    lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
 622    lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
 623 }
 624
 625
 626 /**
 627  * Return pointer to a single mipmap level.
 628  * \param data_array  array of pointers to mipmap levels
 629  * \param level  integer mipmap level
 630  */
 631 LLVMValueRef
 632 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 633                           LLVMValueRef level)
 634 {
 635    LLVMBuilderRef builder = bld->gallivm->builder;
 636    LLVMValueRef indexes[2], data_ptr;
 637
 638    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
 639    indexes[1] = level;
 640    data_ptr = LLVMBuildGEP(builder, bld->data_array, indexes, 2, "");
 641    data_ptr = LLVMBuildLoad(builder, data_ptr, "");
 642    return data_ptr;
 643 }
 644
 645
 646 LLVMValueRef
 647 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 648                                 int level)
 649 {
 650    LLVMValueRef lvl = lp_build_const_int32(bld->gallivm, level);
 651    return lp_build_get_mipmap_level(bld, lvl);
 652 }
 653
 654
 655 /**
 656  * Codegen equivalent for u_minify().
 657  * Return max(1, base_size >> level);
 658  */
 659 static LLVMValueRef
 660 lp_build_minify(struct lp_build_context *bld,
 661                 LLVMValueRef base_size,
 662                 LLVMValueRef level)
 663 {
 664    LLVMBuilderRef builder = bld->gallivm->builder;
 665    assert(lp_check_value(bld->type, base_size));
 666    assert(lp_check_value(bld->type, level));
 667
 668    if (level == bld->zero) {
 669       /* if we're using mipmap level zero, no minification is needed */
 670       return base_size;
 671    }
 672    else {
 673       LLVMValueRef size =
 674          LLVMBuildLShr(builder, base_size, level, "minify");
 675       assert(bld->type.sign);
 676       size = lp_build_max(bld, size, bld->one);
 677       return size;
 678    }
 679 }
 680
 681
 682 /**
 683  * Dereference stride_array[mipmap_level] array to get a stride.
 684  * Return stride as a vector.
 685  */
 686 static LLVMValueRef
 687 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 688                               LLVMValueRef stride_array, LLVMValueRef level)
 689 {
 690    LLVMBuilderRef builder = bld->gallivm->builder;
 691    LLVMValueRef indexes[2], stride;
 692    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
 693    indexes[1] = level;
 694    stride = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
 695    stride = LLVMBuildLoad(builder, stride, "");
 696    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 697    return stride;
 698 }
 699
 700
 701 /**
 702  * When sampling a mipmap, we need to compute the width, height, depth
 703  * of the source levels from the level indexes.  This helper function
 704  * does that.
 705  */
 706 void
 707 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
 708                             LLVMValueRef ilevel,
 709                             LLVMValueRef *out_size,
 710                             LLVMValueRef *row_stride_vec,
 711                             LLVMValueRef *img_stride_vec)
 712 {
 713    const unsigned dims = bld->dims;
 714    LLVMValueRef ilevel_vec;
 715
 716    ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
 717
 718    /*
 719     * Compute width, height, depth at mipmap level 'ilevel'
 720     */
 721    *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
 722
 723    if (dims >= 2) {
 724       *row_stride_vec = lp_build_get_level_stride_vec(bld,
 725                                                       bld->row_stride_array,
 726                                                       ilevel);
 727       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
 728          *img_stride_vec = lp_build_get_level_stride_vec(bld,
 729                                                          bld->img_stride_array,
 730                                                          ilevel);
 731       }
 732    }
 733 }
 734
 735
 736 /**
 737  * Extract and broadcast texture size.
 738  *
 739  * @param size_type   type of the texture size vector (either
 740  *                    bld->int_size_type or bld->float_size_type)
 741  * @param coord_type  type of the texture size vector (either
 742  *                    bld->int_coord_type or bld->coord_type)
 743  * @param int_size    vector with the integer texture size (width, height,
 744  *                    depth)
 745  */
 746 void
 747 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
 748                              struct lp_type size_type,
 749                              struct lp_type coord_type,
 750                              LLVMValueRef size,
 751                              LLVMValueRef *out_width,
 752                              LLVMValueRef *out_height,
 753                              LLVMValueRef *out_depth)
 754 {
 755    const unsigned dims = bld->dims;
 756    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
 757
 758    *out_width = lp_build_extract_broadcast(bld->gallivm,
 759                                            size_type,
 760                                            coord_type,
 761                                            size,
 762                                            LLVMConstInt(i32t, 0, 0));
 763    if (dims >= 2) {
 764       *out_height = lp_build_extract_broadcast(bld->gallivm,
 765                                                size_type,
 766                                                coord_type,
 767                                                size,
 768                                                LLVMConstInt(i32t, 1, 0));
 769       if (dims == 3) {
 770          *out_depth = lp_build_extract_broadcast(bld->gallivm,
 771                                                  size_type,
 772                                                  coord_type,
 773                                                  size,
 774                                                  LLVMConstInt(i32t, 2, 0));
 775       }
 776    }
 777 }
 778
 779
 780 /**
 781  * Unnormalize coords.
 782  *
 783  * @param int_size  vector with the integer texture size (width, height, depth)
 784  */
 785 void
 786 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
 787                              LLVMValueRef flt_size,
 788                              LLVMValueRef *s,
 789                              LLVMValueRef *t,
 790                              LLVMValueRef *r)
 791 {
 792    const unsigned dims = bld->dims;
 793    LLVMValueRef width;
 794    LLVMValueRef height;
 795    LLVMValueRef depth;
 796
 797    lp_build_extract_image_sizes(bld,
 798                                 bld->float_size_type,
 799                                 bld->coord_type,
 800                                 flt_size,
 801                                 &width,
 802                                 &height,
 803                                 &depth);
 804
 805    /* s = s * width, t = t * height */
 806    *s = lp_build_mul(&bld->coord_bld, *s, width);
 807    if (dims >= 2) {
 808       *t = lp_build_mul(&bld->coord_bld, *t, height);
 809       if (dims >= 3) {
 810          *r = lp_build_mul(&bld->coord_bld, *r, depth);
 811       }
 812    }
 813 }
 814
 815
 816 /** Helper used by lp_build_cube_lookup() */
 817 static LLVMValueRef
 818 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
 819 {
 820    /* ima = -0.5 / abs(coord); */
 821    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, -0.5);
 822    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
 823    LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
 824    return ima;
 825 }
 826
 827
 828 /**
 829  * Helper used by lp_build_cube_lookup()
 830  * \param sign  scalar +1 or -1
 831  * \param coord  float vector
 832  * \param ima  float vector
 833  */
 834 static LLVMValueRef
 835 lp_build_cube_coord(struct lp_build_context *coord_bld,
 836                     LLVMValueRef sign, int negate_coord,
 837                     LLVMValueRef coord, LLVMValueRef ima)
 838 {
 839    /* return negate(coord) * ima * sign + 0.5; */
 840    LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
 841    LLVMValueRef res;
 842
 843    assert(negate_coord == +1 || negate_coord == -1);
 844
 845    if (negate_coord == -1) {
 846       coord = lp_build_negate(coord_bld, coord);
 847    }
 848
 849    res = lp_build_mul(coord_bld, coord, ima);
 850    if (sign) {
 851       sign = lp_build_broadcast_scalar(coord_bld, sign);
 852       res = lp_build_mul(coord_bld, res, sign);
 853    }
 854    res = lp_build_add(coord_bld, res, half);
 855
 856    return res;
 857 }
 858
 859
 860 /** Helper used by lp_build_cube_lookup()
 861  * Return (major_coord >= 0) ? pos_face : neg_face;
 862  */
 863 static LLVMValueRef
 864 lp_build_cube_face(struct lp_build_sample_context *bld,
 865                    LLVMValueRef major_coord,
 866                    unsigned pos_face, unsigned neg_face)
 867 {
 868    struct gallivm_state *gallivm = bld->gallivm;
 869    LLVMBuilderRef builder = gallivm->builder;
 870    LLVMValueRef cmp = LLVMBuildFCmp(builder, LLVMRealUGE,
 871                                     major_coord,
 872                                     bld->float_bld.zero, "");
 873    LLVMValueRef pos = lp_build_const_int32(gallivm, pos_face);
 874    LLVMValueRef neg = lp_build_const_int32(gallivm, neg_face);
 875    LLVMValueRef res = LLVMBuildSelect(builder, cmp, pos, neg, "");
 876    return res;
 877 }
 878
 879
 880
 881 /**
 882  * Generate code to do cube face selection and compute per-face texcoords.
 883  */
 884 void
 885 lp_build_cube_lookup(struct lp_build_sample_context *bld,
 886                      LLVMValueRef s,
 887                      LLVMValueRef t,
 888                      LLVMValueRef r,
 889                      LLVMValueRef *face,
 890                      LLVMValueRef *face_s,
 891                      LLVMValueRef *face_t)
 892 {
 893    struct lp_build_context *float_bld = &bld->float_bld;
 894    struct lp_build_context *coord_bld = &bld->coord_bld;
 895    LLVMBuilderRef builder = bld->gallivm->builder;
 896    LLVMValueRef rx, ry, rz;
 897    LLVMValueRef arx, ary, arz;
 898    LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25);
 899    LLVMValueRef arx_ge_ary, arx_ge_arz;
 900    LLVMValueRef ary_ge_arx, ary_ge_arz;
 901    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
 902    LLVMValueRef rx_pos, ry_pos, rz_pos;
 903
 904    assert(bld->coord_bld.type.length == 4);
 905
 906    /*
 907     * Use the average of the four pixel's texcoords to choose the face.
 908     */
 909    rx = lp_build_mul(float_bld, c25,
 910                      lp_build_sum_vector(&bld->coord_bld, s));
 911    ry = lp_build_mul(float_bld, c25,
 912                      lp_build_sum_vector(&bld->coord_bld, t));
 913    rz = lp_build_mul(float_bld, c25,
 914                      lp_build_sum_vector(&bld->coord_bld, r));
 915
 916    arx = lp_build_abs(float_bld, rx);
 917    ary = lp_build_abs(float_bld, ry);
 918    arz = lp_build_abs(float_bld, rz);
 919
 920    /*
 921     * Compare sign/magnitude of rx,ry,rz to determine face
 922     */
 923    arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, "");
 924    arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, "");
 925    ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, "");
 926    ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, "");
 927
 928    arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, "");
 929    ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, "");
 930
 931    rx_pos = LLVMBuildFCmp(builder, LLVMRealUGE, rx, float_bld->zero, "");
 932    ry_pos = LLVMBuildFCmp(builder, LLVMRealUGE, ry, float_bld->zero, "");
 933    rz_pos = LLVMBuildFCmp(builder, LLVMRealUGE, rz, float_bld->zero, "");
 934
 935    {
 936       struct lp_build_if_state if_ctx;
 937       LLVMValueRef face_s_var;
 938       LLVMValueRef face_t_var;
 939       LLVMValueRef face_var;
 940
 941       face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var");
 942       face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var");
 943       face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var");
 944
 945       lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz);
 946       {
 947          /* +/- X face */
 948          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
 949          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
 950          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
 951          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
 952          *face = lp_build_cube_face(bld, rx,
 953                                     PIPE_TEX_FACE_POS_X,
 954                                     PIPE_TEX_FACE_NEG_X);
 955          LLVMBuildStore(builder, *face_s, face_s_var);
 956          LLVMBuildStore(builder, *face_t, face_t_var);
 957          LLVMBuildStore(builder, *face, face_var);
 958       }
 959       lp_build_else(&if_ctx);
 960       {
 961          struct lp_build_if_state if_ctx2;
 962
 963          ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, "");
 964
 965          lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz);
 966          {
 967             /* +/- Y face */
 968             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
 969             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
 970             *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
 971             *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
 972             *face = lp_build_cube_face(bld, ry,
 973                                        PIPE_TEX_FACE_POS_Y,
 974                                        PIPE_TEX_FACE_NEG_Y);
 975             LLVMBuildStore(builder, *face_s, face_s_var);
 976             LLVMBuildStore(builder, *face_t, face_t_var);
 977             LLVMBuildStore(builder, *face, face_var);
 978          }
 979          lp_build_else(&if_ctx2);
 980          {
 981             /* +/- Z face */
 982             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
 983             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
 984             *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
 985             *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
 986             *face = lp_build_cube_face(bld, rz,
 987                                        PIPE_TEX_FACE_POS_Z,
 988                                        PIPE_TEX_FACE_NEG_Z);
 989             LLVMBuildStore(builder, *face_s, face_s_var);
 990             LLVMBuildStore(builder, *face_t, face_t_var);
 991             LLVMBuildStore(builder, *face, face_var);
 992          }
 993          lp_build_endif(&if_ctx2);
 994       }
 995
 996       lp_build_endif(&if_ctx);
 997
 998       *face_s = LLVMBuildLoad(builder, face_s_var, "face_s");
 999       *face_t = LLVMBuildLoad(builder, face_t_var, "face_t");
1000       *face   = LLVMBuildLoad(builder, face_var, "face");
1001    }
1002 }
1003
1004
1005 /**
1006  * Compute the partial offset of a pixel block along an arbitrary axis.
1007  *
1008  * @param coord   coordinate in pixels
1009  * @param stride  number of bytes between rows of successive pixel blocks
1010  * @param block_length  number of pixels in a pixels block along the coordinate
1011  *                      axis
1012  * @param out_offset    resulting relative offset of the pixel block in bytes
1013  * @param out_subcoord  resulting sub-block pixel coordinate
1014  */
1015 void
1016 lp_build_sample_partial_offset(struct lp_build_context *bld,
1017                                unsigned block_length,
1018                                LLVMValueRef coord,
1019                                LLVMValueRef stride,
1020                                LLVMValueRef *out_offset,
1021                                LLVMValueRef *out_subcoord)
1022 {
1023    LLVMBuilderRef builder = bld->gallivm->builder;
1024    LLVMValueRef offset;
1025    LLVMValueRef subcoord;
1026
1027    if (block_length == 1) {
1028       subcoord = bld->zero;
1029    }
1030    else {
1031       /*
1032        * Pixel blocks have power of two dimensions. LLVM should convert the
1033        * rem/div to bit arithmetic.
1034        * TODO: Verify this.
1035        * It does indeed BUT it does transform it to scalar (and back) when doing so
1036        * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
1037        * The generated code looks seriously unfunny and is quite expensive.
1038        */
1039 #if 0
1040       LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
1041       subcoord = LLVMBuildURem(builder, coord, block_width, "");
1042       coord    = LLVMBuildUDiv(builder, coord, block_width, "");
1043 #else
1044       unsigned logbase2 = util_unsigned_logbase2(block_length);
1045       LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
1046       LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
1047       subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
1048       coord = LLVMBuildLShr(builder, coord, block_shift, "");
1049 #endif
1050    }
1051
1052    offset = lp_build_mul(bld, coord, stride);
1053
1054    assert(out_offset);
1055    assert(out_subcoord);
1056
1057    *out_offset = offset;
1058    *out_subcoord = subcoord;
1059 }
1060
1061
1062 /**
1063  * Compute the offset of a pixel block.
1064  *
1065  * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
1066  *
1067  * Returns the relative offset and i,j sub-block coordinates
1068  */
1069 void
1070 lp_build_sample_offset(struct lp_build_context *bld,
1071                        const struct util_format_description *format_desc,
1072                        LLVMValueRef x,
1073                        LLVMValueRef y,
1074                        LLVMValueRef z,
1075                        LLVMValueRef y_stride,
1076                        LLVMValueRef z_stride,
1077                        LLVMValueRef *out_offset,
1078                        LLVMValueRef *out_i,
1079                        LLVMValueRef *out_j)
1080 {
1081    LLVMValueRef x_stride;
1082    LLVMValueRef offset;
1083
1084    x_stride = lp_build_const_vec(bld->gallivm, bld->type,
1085                                  format_desc->block.bits/8);
1086
1087    lp_build_sample_partial_offset(bld,
1088                                   format_desc->block.width,
1089                                   x, x_stride,
1090                                   &offset, out_i);
1091
1092    if (y && y_stride) {
1093       LLVMValueRef y_offset;
1094       lp_build_sample_partial_offset(bld,
1095                                      format_desc->block.height,
1096                                      y, y_stride,
1097                                      &y_offset, out_j);
1098       offset = lp_build_add(bld, offset, y_offset);
1099    }
1100    else {
1101       *out_j = bld->zero;
1102    }
1103
1104    if (z && z_stride) {
1105       LLVMValueRef z_offset;
1106       LLVMValueRef k;
1107       lp_build_sample_partial_offset(bld,
1108                                      1, /* pixel blocks are always 2D */
1109                                      z, z_stride,
1110                                      &z_offset, &k);
1111       offset = lp_build_add(bld, offset, z_offset);
1112    }
1113
1114    *out_offset = offset;
1115 }