src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  */
  35
  36 #include "sp_context.h"
  37 #include "sp_quad.h"
  38 #include "sp_surface.h"
  39 #include "sp_texture.h"
  40 #include "sp_tex_sample.h"
  41 #include "sp_tile_cache.h"
  42 #include "pipe/p_context.h"
  43 #include "pipe/p_defines.h"
  44 #include "util/u_math.h"
  45 #include "util/u_memory.h"
  46
  47
  48
  49 /*
  50  * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
  51  * see 1-pixel bands of improperly weighted linear-filtered textures.
  52  * The tests/texwrap.c demo is a good test.
  53  * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
  54  * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
  55  */
  56 #define FRAC(f)  ((f) - util_ifloor(f))
  57
  58
  59 /**
  60  * Linear interpolation macro
  61  */
  62 static INLINE float
  63 lerp(float a, float v0, float v1)
  64 {
  65    return v0 + a * (v1 - v0);
  66 }
  67
  68
  69 /**
  70  * Do 2D/biliner interpolation of float values.
  71  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  72  * a and b are the horizontal and vertical interpolants.
  73  * It's important that this function is inlined when compiled with
  74  * optimization!  If we find that's not true on some systems, convert
  75  * to a macro.
  76  */
  77 static INLINE float
  78 lerp_2d(float a, float b,
  79         float v00, float v10, float v01, float v11)
  80 {
  81    const float temp0 = lerp(a, v00, v10);
  82    const float temp1 = lerp(a, v01, v11);
  83    return lerp(b, temp0, temp1);
  84 }
  85
  86
  87 /**
  88  * As above, but 3D interpolation of 8 values.
  89  */
  90 static INLINE float
  91 lerp_3d(float a, float b, float c,
  92         float v000, float v100, float v010, float v110,
  93         float v001, float v101, float v011, float v111)
  94 {
  95    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
  96    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
  97    return lerp(c, temp0, temp1);
  98 }
  99
 100
 101
 102 /**
 103  * If A is a signed integer, A % B doesn't give the right value for A < 0
 104  * (in terms of texture repeat).  Just casting to unsigned fixes that.
 105  */
 106 #define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
 107
 108
 109 /**
 110  * Apply texture coord wrapping mode and return integer texture indexes
 111  * for a vector of four texcoords (S or T or P).
 112  * \param wrapMode  PIPE_TEX_WRAP_x
 113  * \param s  the incoming texcoords
 114  * \param size  the texture image size
 115  * \param icoord  returns the integer texcoords
 116  * \return  integer texture index
 117  */
 118 static INLINE void
 119 nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
 120                    int icoord[4])
 121 {
 122    uint ch;
 123    switch (wrapMode) {
 124    case PIPE_TEX_WRAP_REPEAT:
 125       /* s limited to [0,1) */
 126       /* i limited to [0,size-1] */
 127       for (ch = 0; ch < 4; ch++) {
 128          int i = util_ifloor(s[ch] * size);
 129          icoord[ch] = REMAINDER(i, size);
 130       }
 131       return;
 132    case PIPE_TEX_WRAP_CLAMP:
 133       /* s limited to [0,1] */
 134       /* i limited to [0,size-1] */
 135       for (ch = 0; ch < 4; ch++) {
 136          if (s[ch] <= 0.0F)
 137             icoord[ch] = 0;
 138          else if (s[ch] >= 1.0F)
 139             icoord[ch] = size - 1;
 140          else
 141             icoord[ch] = util_ifloor(s[ch] * size);
 142       }
 143       return;
 144    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 145       {
 146          /* s limited to [min,max] */
 147          /* i limited to [0, size-1] */
 148          const float min = 1.0F / (2.0F * size);
 149          const float max = 1.0F - min;
 150          for (ch = 0; ch < 4; ch++) {
 151             if (s[ch] < min)
 152                icoord[ch] = 0;
 153             else if (s[ch] > max)
 154                icoord[ch] = size - 1;
 155             else
 156                icoord[ch] = util_ifloor(s[ch] * size);
 157          }
 158       }
 159       return;
 160    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 161       {
 162          /* s limited to [min,max] */
 163          /* i limited to [-1, size] */
 164          const float min = -1.0F / (2.0F * size);
 165          const float max = 1.0F - min;
 166          for (ch = 0; ch < 4; ch++) {
 167             if (s[ch] <= min)
 168                icoord[ch] = -1;
 169             else if (s[ch] >= max)
 170                icoord[ch] = size;
 171             else
 172                icoord[ch] = util_ifloor(s[ch] * size);
 173          }
 174       }
 175       return;
 176    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 177       {
 178          const float min = 1.0F / (2.0F * size);
 179          const float max = 1.0F - min;
 180          for (ch = 0; ch < 4; ch++) {
 181             const int flr = util_ifloor(s[ch]);
 182             float u;
 183             if (flr & 1)
 184                u = 1.0F - (s[ch] - (float) flr);
 185             else
 186                u = s[ch] - (float) flr;
 187             if (u < min)
 188                icoord[ch] = 0;
 189             else if (u > max)
 190                icoord[ch] = size - 1;
 191             else
 192                icoord[ch] = util_ifloor(u * size);
 193          }
 194       }
 195       return;
 196    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 197       for (ch = 0; ch < 4; ch++) {
 198          /* s limited to [0,1] */
 199          /* i limited to [0,size-1] */
 200          const float u = fabsf(s[ch]);
 201          if (u <= 0.0F)
 202             icoord[ch] = 0;
 203          else if (u >= 1.0F)
 204             icoord[ch] = size - 1;
 205          else
 206             icoord[ch] = util_ifloor(u * size);
 207       }
 208       return;
 209    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 210       {
 211          /* s limited to [min,max] */
 212          /* i limited to [0, size-1] */
 213          const float min = 1.0F / (2.0F * size);
 214          const float max = 1.0F - min;
 215          for (ch = 0; ch < 4; ch++) {
 216             const float u = fabsf(s[ch]);
 217             if (u < min)
 218                icoord[ch] = 0;
 219             else if (u > max)
 220                icoord[ch] = size - 1;
 221             else
 222                icoord[ch] = util_ifloor(u * size);
 223          }
 224       }
 225       return;
 226    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 227       {
 228          /* s limited to [min,max] */
 229          /* i limited to [0, size-1] */
 230          const float min = -1.0F / (2.0F * size);
 231          const float max = 1.0F - min;
 232          for (ch = 0; ch < 4; ch++) {
 233             const float u = fabsf(s[ch]);
 234             if (u < min)
 235                icoord[ch] = -1;
 236             else if (u > max)
 237                icoord[ch] = size;
 238             else
 239                icoord[ch] = util_ifloor(u * size);
 240          }
 241       }
 242       return;
 243    default:
 244       assert(0);
 245    }
 246 }
 247
 248
 249 /**
 250  * Used to compute texel locations for linear sampling for four texcoords.
 251  * \param wrapMode  PIPE_TEX_WRAP_x
 252  * \param s  the texcoords
 253  * \param size  the texture image size
 254  * \param icoord0  returns first texture indexes
 255  * \param icoord1  returns second texture indexes (usually icoord0 + 1)
 256  * \param w  returns blend factor/weight between texture indexes
 257  * \param icoord  returns the computed integer texture coords
 258  */
 259 static INLINE void
 260 linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
 261                   int icoord0[4], int icoord1[4], float w[4])
 262 {
 263    uint ch;
 264
 265    switch (wrapMode) {
 266    case PIPE_TEX_WRAP_REPEAT:
 267       for (ch = 0; ch < 4; ch++) {
 268          float u = s[ch] * size - 0.5F;
 269          icoord0[ch] = REMAINDER(util_ifloor(u), size);
 270          icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
 271          w[ch] = FRAC(u);
 272       }
 273       break;;
 274    case PIPE_TEX_WRAP_CLAMP:
 275       for (ch = 0; ch < 4; ch++) {
 276          float u = CLAMP(s[ch], 0.0F, 1.0F);
 277          u = u * size - 0.5f;
 278          icoord0[ch] = util_ifloor(u);
 279          icoord1[ch] = icoord0[ch] + 1;
 280          w[ch] = FRAC(u);
 281       }
 282       break;;
 283    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 284       for (ch = 0; ch < 4; ch++) {
 285          float u = CLAMP(s[ch], 0.0F, 1.0F);
 286          u = u * size - 0.5f;
 287          icoord0[ch] = util_ifloor(u);
 288          icoord1[ch] = icoord0[ch] + 1;
 289          if (icoord0[ch] < 0)
 290             icoord0[ch] = 0;
 291          if (icoord1[ch] >= (int) size)
 292             icoord1[ch] = size - 1;
 293          w[ch] = FRAC(u);
 294       }
 295       break;;
 296    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 297       {
 298          const float min = -1.0F / (2.0F * size);
 299          const float max = 1.0F - min;
 300          for (ch = 0; ch < 4; ch++) {
 301             float u = CLAMP(s[ch], min, max);
 302             u = u * size - 0.5f;
 303             icoord0[ch] = util_ifloor(u);
 304             icoord1[ch] = icoord0[ch] + 1;
 305             w[ch] = FRAC(u);
 306          }
 307       }
 308       break;;
 309    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 310       for (ch = 0; ch < 4; ch++) {
 311          const int flr = util_ifloor(s[ch]);
 312          float u;
 313          if (flr & 1)
 314             u = 1.0F - (s[ch] - (float) flr);
 315          else
 316             u = s[ch] - (float) flr;
 317          u = u * size - 0.5F;
 318          icoord0[ch] = util_ifloor(u);
 319          icoord1[ch] = icoord0[ch] + 1;
 320          if (icoord0[ch] < 0)
 321             icoord0[ch] = 0;
 322          if (icoord1[ch] >= (int) size)
 323             icoord1[ch] = size - 1;
 324          w[ch] = FRAC(u);
 325       }
 326       break;;
 327    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 328       for (ch = 0; ch < 4; ch++) {
 329          float u = fabsf(s[ch]);
 330          if (u >= 1.0F)
 331             u = (float) size;
 332          else
 333             u *= size;
 334          u -= 0.5F;
 335          icoord0[ch] = util_ifloor(u);
 336          icoord1[ch] = icoord0[ch] + 1;
 337          w[ch] = FRAC(u);
 338       }
 339       break;;
 340    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 341       for (ch = 0; ch < 4; ch++) {
 342          float u = fabsf(s[ch]);
 343          if (u >= 1.0F)
 344             u = (float) size;
 345          else
 346             u *= size;
 347          u -= 0.5F;
 348          icoord0[ch] = util_ifloor(u);
 349          icoord1[ch] = icoord0[ch] + 1;
 350          if (icoord0[ch] < 0)
 351             icoord0[ch] = 0;
 352          if (icoord1[ch] >= (int) size)
 353             icoord1[ch] = size - 1;
 354          w[ch] = FRAC(u);
 355       }
 356       break;;
 357    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 358       {
 359          const float min = -1.0F / (2.0F * size);
 360          const float max = 1.0F - min;
 361          for (ch = 0; ch < 4; ch++) {
 362             float u = fabsf(s[ch]);
 363             if (u <= min)
 364                u = min * size;
 365             else if (u >= max)
 366                u = max * size;
 367             else
 368                u *= size;
 369             u -= 0.5F;
 370             icoord0[ch] = util_ifloor(u);
 371             icoord1[ch] = icoord0[ch] + 1;
 372             w[ch] = FRAC(u);
 373          }
 374       }
 375       break;;
 376    default:
 377       assert(0);
 378    }
 379 }
 380
 381
 382 /**
 383  * For RECT textures / unnormalized texcoords
 384  * Only a subset of wrap modes supported.
 385  */
 386 static INLINE void
 387 nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
 388                           int icoord[4])
 389 {
 390    uint ch;
 391    switch (wrapMode) {
 392    case PIPE_TEX_WRAP_CLAMP:
 393       for (ch = 0; ch < 4; ch++) {
 394          int i = util_ifloor(s[ch]);
 395          icoord[ch]= CLAMP(i, 0, (int) size-1);
 396       }
 397       return;
 398    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 399       /* fall-through */
 400    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 401       for (ch = 0; ch < 4; ch++) {
 402          icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
 403       }
 404       return;
 405    default:
 406       assert(0);
 407    }
 408 }
 409
 410
 411 /**
 412  * For RECT textures / unnormalized texcoords.
 413  * Only a subset of wrap modes supported.
 414  */
 415 static INLINE void
 416 linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
 417                          int icoord0[4], int icoord1[4], float w[4])
 418 {
 419    uint ch;
 420    switch (wrapMode) {
 421    case PIPE_TEX_WRAP_CLAMP:
 422       for (ch = 0; ch < 4; ch++) {
 423          /* Not exactly what the spec says, but it matches NVIDIA output */
 424          float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
 425          icoord0[ch] = util_ifloor(u);
 426          icoord1[ch] = icoord0[ch] + 1;
 427          w[ch] = FRAC(u);
 428       }
 429       return;
 430    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 431       /* fall-through */
 432    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 433       for (ch = 0; ch < 4; ch++) {
 434          float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
 435          u -= 0.5F;
 436          icoord0[ch] = util_ifloor(u);
 437          icoord1[ch] = icoord0[ch] + 1;
 438          if (icoord1[ch] > (int) size - 1)
 439             icoord1[ch] = size - 1;
 440          w[ch] = FRAC(u);
 441       }
 442       break;
 443    default:
 444       assert(0);
 445    }
 446 }
 447
 448
 449 static unsigned
 450 choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
 451 {
 452    /*
 453       major axis
 454       direction     target                             sc     tc    ma
 455       ----------    -------------------------------    ---    ---   ---
 456        +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
 457        -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
 458        +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
 459        -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
 460        +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
 461        -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
 462    */
 463    const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
 464    unsigned face;
 465    float sc, tc, ma;
 466
 467    if (arx > ary && arx > arz) {
 468       if (rx >= 0.0F) {
 469          face = PIPE_TEX_FACE_POS_X;
 470          sc = -rz;
 471          tc = -ry;
 472          ma = arx;
 473       }
 474       else {
 475          face = PIPE_TEX_FACE_NEG_X;
 476          sc = rz;
 477          tc = -ry;
 478          ma = arx;
 479       }
 480    }
 481    else if (ary > arx && ary > arz) {
 482       if (ry >= 0.0F) {
 483          face = PIPE_TEX_FACE_POS_Y;
 484          sc = rx;
 485          tc = rz;
 486          ma = ary;
 487       }
 488       else {
 489          face = PIPE_TEX_FACE_NEG_Y;
 490          sc = rx;
 491          tc = -rz;
 492          ma = ary;
 493       }
 494    }
 495    else {
 496       if (rz > 0.0F) {
 497          face = PIPE_TEX_FACE_POS_Z;
 498          sc = rx;
 499          tc = -ry;
 500          ma = arz;
 501       }
 502       else {
 503          face = PIPE_TEX_FACE_NEG_Z;
 504          sc = -rx;
 505          tc = -ry;
 506          ma = arz;
 507       }
 508    }
 509
 510    *newS = ( sc / ma + 1.0F ) * 0.5F;
 511    *newT = ( tc / ma + 1.0F ) * 0.5F;
 512
 513    return face;
 514 }
 515
 516
 517 /**
 518  * Examine the quad's texture coordinates to compute the partial
 519  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 520  *
 521  * This is only done for fragment shaders, not vertex shaders.
 522  */
 523 static float
 524 compute_lambda(const struct pipe_texture *tex,
 525                const struct pipe_sampler_state *sampler,
 526                const float s[QUAD_SIZE],
 527                const float t[QUAD_SIZE],
 528                const float p[QUAD_SIZE],
 529                float lodbias)
 530 {
 531    float rho, lambda;
 532
 533    assert(sampler->normalized_coords);
 534
 535    assert(s);
 536    {
 537       float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
 538       float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
 539       dsdx = fabsf(dsdx);
 540       dsdy = fabsf(dsdy);
 541       rho = MAX2(dsdx, dsdy) * tex->width[0];
 542    }
 543    if (t) {
 544       float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
 545       float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
 546       float max;
 547       dtdx = fabsf(dtdx);
 548       dtdy = fabsf(dtdy);
 549       max = MAX2(dtdx, dtdy) * tex->height[0];
 550       rho = MAX2(rho, max);
 551    }
 552    if (p) {
 553       float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
 554       float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
 555       float max;
 556       dpdx = fabsf(dpdx);
 557       dpdy = fabsf(dpdy);
 558       max = MAX2(dpdx, dpdy) * tex->depth[0];
 559       rho = MAX2(rho, max);
 560    }
 561
 562    lambda = util_fast_log2(rho);
 563    lambda += lodbias + sampler->lod_bias;
 564    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 565
 566    return lambda;
 567 }
 568
 569
 570 /**
 571  * Do several things here:
 572  * 1. Compute lambda from the texcoords, if needed
 573  * 2. Determine if we're minifying or magnifying
 574  * 3. If minifying, choose mipmap levels
 575  * 4. Return image filter to use within mipmap images
 576  * \param level0  Returns first mipmap level to sample from
 577  * \param level1  Returns second mipmap level to sample from
 578  * \param levelBlend  Returns blend factor between levels, in [0,1]
 579  * \param imgFilter  Returns either the min or mag filter, depending on lambda
 580  */
 581 static void
 582 choose_mipmap_levels(const struct pipe_texture *texture,
 583                      const struct pipe_sampler_state *sampler,
 584                      const float s[QUAD_SIZE],
 585                      const float t[QUAD_SIZE],
 586                      const float p[QUAD_SIZE],
 587                      boolean computeLambda,
 588                      float lodbias,
 589                      unsigned *level0, unsigned *level1, float *levelBlend,
 590                      unsigned *imgFilter)
 591 {
 592    if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
 593       /* no mipmap selection needed */
 594       *level0 = *level1 = CLAMP((int) sampler->min_lod,
 595                                 0, (int) texture->last_level);
 596
 597       if (sampler->min_img_filter != sampler->mag_img_filter) {
 598          /* non-mipmapped texture, but still need to determine if doing
 599           * minification or magnification.
 600           */
 601          float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
 602          if (lambda <= 0.0) {
 603             *imgFilter = sampler->mag_img_filter;
 604          }
 605          else {
 606             *imgFilter = sampler->min_img_filter;
 607          }
 608       }
 609       else {
 610          *imgFilter = sampler->mag_img_filter;
 611       }
 612    }
 613    else {
 614       float lambda;
 615
 616       if (computeLambda)
 617          /* fragment shader */
 618          lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
 619       else
 620          /* vertex shader */
 621          lambda = lodbias; /* not really a bias, but absolute LOD */
 622
 623       if (lambda <= 0.0) { /* XXX threshold depends on the filter */
 624          /* magnifying */
 625          *imgFilter = sampler->mag_img_filter;
 626          *level0 = *level1 = 0;
 627       }
 628       else {
 629          /* minifying */
 630          *imgFilter = sampler->min_img_filter;
 631
 632          /* choose mipmap level(s) and compute the blend factor between them */
 633          if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
 634             /* Nearest mipmap level */
 635             const int lvl = (int) (lambda + 0.5);
 636             *level0 =
 637             *level1 = CLAMP(lvl, 0, (int) texture->last_level);
 638          }
 639          else {
 640             /* Linear interpolation between mipmap levels */
 641             const int lvl = (int) lambda;
 642             *level0 = CLAMP(lvl,     0, (int) texture->last_level);
 643             *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
 644             *levelBlend = FRAC(lambda);  /* blending weight between levels */
 645          }
 646       }
 647    }
 648 }
 649
 650
 651 /**
 652  * Get a texel from a texture, using the texture tile cache.
 653  *
 654  * \param face  the cube face in 0..5
 655  * \param level  the mipmap level
 656  * \param x  the x coord of texel within 2D image
 657  * \param y  the y coord of texel within 2D image
 658  * \param z  which slice of a 3D texture
 659  * \param rgba  the quad to put the texel/color into
 660  * \param j  which element of the rgba quad to write to
 661  *
 662  * XXX maybe move this into sp_tile_cache.c and merge with the
 663  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
 664  */
 665 static void
 666 get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
 667                   unsigned face, unsigned level, int x, int y,
 668                   const float *out[4])
 669 {
 670    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
 671
 672    const struct softpipe_cached_tile *tile
 673       = sp_get_cached_tile_tex(samp->cache,
 674                                tile_address(x, y, 0, face, level));
 675
 676    y %= TILE_SIZE;
 677    x %= TILE_SIZE;
 678
 679    out[0] = &tile->data.color[y  ][x  ][0];
 680    out[1] = &tile->data.color[y  ][x+1][0];
 681    out[2] = &tile->data.color[y+1][x  ][0];
 682    out[3] = &tile->data.color[y+1][x+1][0];
 683 }
 684
 685 static INLINE const float *
 686 get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
 687                  unsigned face, unsigned level, int x, int y)
 688 {
 689    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
 690
 691    const struct softpipe_cached_tile *tile
 692       = sp_get_cached_tile_tex(samp->cache,
 693                                tile_address(x, y, 0, face, level));
 694
 695    y %= TILE_SIZE;
 696    x %= TILE_SIZE;
 697
 698    return &tile->data.color[y][x][0];
 699 }
 700
 701
 702 static void
 703 get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
 704                      unsigned face, unsigned level,
 705                      int x0, int y0,
 706                      int x1, int y1,
 707                      const float *out[4])
 708 {
 709    unsigned i;
 710
 711    for (i = 0; i < 4; i++) {
 712       unsigned tx = (i & 1) ? x1 : x0;
 713       unsigned ty = (i >> 1) ? y1 : y0;
 714
 715       out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
 716    }
 717 }
 718
 719 static void
 720 get_texel(const struct tgsi_sampler *tgsi_sampler,
 721                  unsigned face, unsigned level, int x, int y, int z,
 722                  float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
 723 {
 724    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
 725    const struct pipe_texture *texture = samp->texture;
 726    const struct pipe_sampler_state *sampler = samp->sampler;
 727
 728    if (x < 0 || x >= (int) texture->width[level] ||
 729        y < 0 || y >= (int) texture->height[level] ||
 730        z < 0 || z >= (int) texture->depth[level]) {
 731       rgba[0][j] = sampler->border_color[0];
 732       rgba[1][j] = sampler->border_color[1];
 733       rgba[2][j] = sampler->border_color[2];
 734       rgba[3][j] = sampler->border_color[3];
 735    }
 736    else {
 737       const unsigned tx = x % TILE_SIZE;
 738       const unsigned ty = y % TILE_SIZE;
 739       const struct softpipe_cached_tile *tile;
 740
 741       tile = sp_get_cached_tile_tex(samp->cache,
 742                                     tile_address(x, y, z, face, level));
 743
 744       rgba[0][j] = tile->data.color[ty][tx][0];
 745       rgba[1][j] = tile->data.color[ty][tx][1];
 746       rgba[2][j] = tile->data.color[ty][tx][2];
 747       rgba[3][j] = tile->data.color[ty][tx][3];
 748       if (0)
 749       {
 750          debug_printf("Get texel %f %f %f %f from %s\n",
 751                       rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
 752                       pf_name(texture->format));
 753       }
 754    }
 755 }
 756
 757
 758 /**
 759  * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
 760  * When we sampled the depth texture, the depth value was put into all
 761  * RGBA channels.  We look at the red channel here.
 762  * \param rgba  quad of (depth) texel values
 763  * \param p  texture 'P' components for four pixels in quad
 764  * \param j  which pixel in the quad to test [0..3]
 765  */
 766 static INLINE void
 767 shadow_compare(const struct pipe_sampler_state *sampler,
 768                float rgba[NUM_CHANNELS][QUAD_SIZE],
 769                const float p[QUAD_SIZE],
 770                uint j)
 771 {
 772    int k;
 773    switch (sampler->compare_func) {
 774    case PIPE_FUNC_LESS:
 775       k = p[j] < rgba[0][j];
 776       break;
 777    case PIPE_FUNC_LEQUAL:
 778       k = p[j] <= rgba[0][j];
 779       break;
 780    case PIPE_FUNC_GREATER:
 781       k = p[j] > rgba[0][j];
 782       break;
 783    case PIPE_FUNC_GEQUAL:
 784       k = p[j] >= rgba[0][j];
 785       break;
 786    case PIPE_FUNC_EQUAL:
 787       k = p[j] == rgba[0][j];
 788       break;
 789    case PIPE_FUNC_NOTEQUAL:
 790       k = p[j] != rgba[0][j];
 791       break;
 792    case PIPE_FUNC_ALWAYS:
 793       k = 1;
 794       break;
 795    case PIPE_FUNC_NEVER:
 796       k = 0;
 797       break;
 798    default:
 799       k = 0;
 800       assert(0);
 801       break;
 802    }
 803
 804    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
 805    rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
 806    rgba[3][j] = 1.0F;
 807 }
 808
 809
 810 /**
 811  * As above, but do four z/texture comparisons.
 812  */
 813 static INLINE void
 814 shadow_compare4(const struct pipe_sampler_state *sampler,
 815                 float rgba[NUM_CHANNELS][QUAD_SIZE],
 816                 const float p[QUAD_SIZE])
 817 {
 818    int j, k0, k1, k2, k3;
 819    float val;
 820
 821    /* compare four texcoords vs. four texture samples */
 822    switch (sampler->compare_func) {
 823    case PIPE_FUNC_LESS:
 824       k0 = p[0] < rgba[0][0];
 825       k1 = p[1] < rgba[0][1];
 826       k2 = p[2] < rgba[0][2];
 827       k3 = p[3] < rgba[0][3];
 828       break;
 829    case PIPE_FUNC_LEQUAL:
 830       k0 = p[0] <= rgba[0][0];
 831       k1 = p[1] <= rgba[0][1];
 832       k2 = p[2] <= rgba[0][2];
 833       k3 = p[3] <= rgba[0][3];
 834       break;
 835    case PIPE_FUNC_GREATER:
 836       k0 = p[0] > rgba[0][0];
 837       k1 = p[1] > rgba[0][1];
 838       k2 = p[2] > rgba[0][2];
 839       k3 = p[3] > rgba[0][3];
 840       break;
 841    case PIPE_FUNC_GEQUAL:
 842       k0 = p[0] >= rgba[0][0];
 843       k1 = p[1] >= rgba[0][1];
 844       k2 = p[2] >= rgba[0][2];
 845       k3 = p[3] >= rgba[0][3];
 846       break;
 847    case PIPE_FUNC_EQUAL:
 848       k0 = p[0] == rgba[0][0];
 849       k1 = p[1] == rgba[0][1];
 850       k2 = p[2] == rgba[0][2];
 851       k3 = p[3] == rgba[0][3];
 852       break;
 853    case PIPE_FUNC_NOTEQUAL:
 854       k0 = p[0] != rgba[0][0];
 855       k1 = p[1] != rgba[0][1];
 856       k2 = p[2] != rgba[0][2];
 857       k3 = p[3] != rgba[0][3];
 858       break;
 859    case PIPE_FUNC_ALWAYS:
 860       k0 = k1 = k2 = k3 = 1;
 861       break;
 862    case PIPE_FUNC_NEVER:
 863       k0 = k1 = k2 = k3 = 0;
 864       break;
 865    default:
 866       k0 = k1 = k2 = k3 = 0;
 867       assert(0);
 868       break;
 869    }
 870
 871    /* convert four pass/fail values to an intensity in [0,1] */
 872    val = 0.25F * (k0 + k1 + k2 + k3);
 873
 874    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
 875    for (j = 0; j < 4; j++) {
 876       rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
 877       rgba[3][j] = 1.0F;
 878    }
 879 }
 880
 881
 882
 883 static void
 884 sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 885                                     const float s[QUAD_SIZE],
 886                                     const float t[QUAD_SIZE],
 887                                     const float p[QUAD_SIZE],
 888                                     float lodbias,
 889                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
 890 {
 891    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
 892    unsigned  j;
 893    unsigned level = samp->level;
 894    unsigned xpot = 1 << (samp->xpot - level);
 895    unsigned ypot = 1 << (samp->ypot - level);
 896    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 897    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 898
 899    for (j = 0; j < QUAD_SIZE; j++) {
 900       int c;
 901
 902       float u = s[j] * xpot - 0.5F;
 903       float v = t[j] * ypot - 0.5F;
 904
 905       int uflr = util_ifloor(u);
 906       int vflr = util_ifloor(v);
 907
 908       float xw = u - (float)uflr;
 909       float yw = v - (float)vflr;
 910
 911       int x0 = uflr & (xpot - 1);
 912       int y0 = vflr & (ypot - 1);
 913
 914       const float *tx[4];
 915
 916
 917       /* Can we fetch all four at once:
 918        */
 919       if (x0 < xmax && y0 < ymax)
 920       {
 921          get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
 922       }
 923       else
 924       {
 925          unsigned x1 = (x0 + 1) & (xpot - 1);
 926          unsigned y1 = (y0 + 1) & (ypot - 1);
 927          get_texel_quad_2d_mt(tgsi_sampler, 0, level,
 928                               x0, y0, x1, y1, tx);
 929       }
 930
 931
 932       /* interpolate R, G, B, A */
 933       for (c = 0; c < 4; c++) {
 934          rgba[c][j] = lerp_2d(xw, yw,
 935                               tx[0][c], tx[1][c],
 936                               tx[2][c], tx[3][c]);
 937       }
 938    }
 939 }
 940
 941
 942 static void
 943 sp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 944                                      const float s[QUAD_SIZE],
 945                                      const float t[QUAD_SIZE],
 946                                      const float p[QUAD_SIZE],
 947                                      float lodbias,
 948                                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 949 {
 950    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
 951    unsigned  j;
 952    unsigned level = samp->level;
 953    unsigned xpot = 1 << (samp->xpot - level);
 954    unsigned ypot = 1 << (samp->ypot - level);
 955
 956    for (j = 0; j < QUAD_SIZE; j++) {
 957       int c;
 958
 959       float u = s[j] * xpot;
 960       float v = t[j] * ypot;
 961
 962       int uflr = util_ifloor(u);
 963       int vflr = util_ifloor(v);
 964
 965       int x0 = uflr & (xpot - 1);
 966       int y0 = vflr & (ypot - 1);
 967
 968       const float *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
 969
 970       for (c = 0; c < 4; c++) {
 971          rgba[c][j] = out[c];
 972       }
 973    }
 974 }
 975
 976
 977 static void
 978 sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 979                                      const float s[QUAD_SIZE],
 980                                      const float t[QUAD_SIZE],
 981                                      const float p[QUAD_SIZE],
 982                                      float lodbias,
 983                                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 984 {
 985    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
 986    unsigned  j;
 987    unsigned level = samp->level;
 988    unsigned xpot = 1 << (samp->xpot - level);
 989    unsigned ypot = 1 << (samp->ypot - level);
 990
 991    for (j = 0; j < QUAD_SIZE; j++) {
 992       int c;
 993
 994       float u = s[j] * xpot;
 995       float v = t[j] * ypot;
 996
 997       int x0, y0;
 998       const float *out;
 999
1000       x0 = util_ifloor(u);
1001       if (x0 < 0)
1002          x0 = 0;
1003       else if (x0 > xpot - 1)
1004          x0 = xpot - 1;
1005
1006       y0 = util_ifloor(v);
1007       if (y0 < 0)
1008          y0 = 0;
1009       else if (y0 > ypot - 1)
1010          y0 = ypot - 1;
1011
1012       out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
1013
1014       for (c = 0; c < 4; c++) {
1015          rgba[c][j] = out[c];
1016       }
1017    }
1018 }
1019
1020
1021 static void
1022 sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
1023                                                const float s[QUAD_SIZE],
1024                                                const float t[QUAD_SIZE],
1025                                                const float p[QUAD_SIZE],
1026                                                float lodbias,
1027                                                float rgba[NUM_CHANNELS][QUAD_SIZE])
1028 {
1029    struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1030    const struct pipe_texture *texture = samp->texture;
1031    const struct pipe_sampler_state *sampler = samp->sampler;
1032    int level0;
1033    float lambda;
1034
1035    lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
1036    level0 = (int)lambda;
1037
1038    if (lambda < 0.0) {
1039       samp->level = 0;
1040       sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1041                                            s, t, p, 0, rgba );
1042    }
1043    else if (level0 >= texture->last_level) {
1044       samp->level = texture->last_level;
1045       sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1046                                            s, t, p, 0, rgba );
1047    }
1048    else {
1049       float levelBlend = lambda - level0;
1050       float rgba0[4][4];
1051       float rgba1[4][4];
1052       int c,j;
1053
1054       samp->level = level0;
1055       sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1056                                            s, t, p, 0, rgba0 );
1057
1058       samp->level = level0+1;
1059       sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1060                                            s, t, p, 0, rgba1 );
1061
1062       for (j = 0; j < QUAD_SIZE; j++) {
1063          for (c = 0; c < 4; c++) {
1064             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1065          }
1066       }
1067    }
1068 }
1069
1070 /**
1071  * Common code for sampling 1D/2D/cube textures.
1072  * Could probably extend for 3D...
1073  */
1074 static void
1075 sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
1076                          const float s[QUAD_SIZE],
1077                          const float t[QUAD_SIZE],
1078                          const float p[QUAD_SIZE],
1079                          boolean computeLambda,
1080                          float lodbias,
1081                          float rgba[NUM_CHANNELS][QUAD_SIZE],
1082                          const unsigned faces[4])
1083 {
1084    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1085    const struct pipe_texture *texture = samp->texture;
1086    const struct pipe_sampler_state *sampler = samp->sampler;
1087    unsigned level0, level1, j, imgFilter;
1088    int width, height;
1089    float levelBlend;
1090
1091    choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
1092                         &level0, &level1, &levelBlend, &imgFilter);
1093
1094    assert(sampler->normalized_coords);
1095
1096    width = texture->width[level0];
1097    height = texture->height[level0];
1098
1099    assert(width > 0);
1100
1101    switch (imgFilter) {
1102    case PIPE_TEX_FILTER_NEAREST:
1103       {
1104          int x[4], y[4];
1105          nearest_texcoord_4(sampler->wrap_s, s, width, x);
1106          nearest_texcoord_4(sampler->wrap_t, t, height, y);
1107
1108          for (j = 0; j < QUAD_SIZE; j++) {
1109             get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
1110             if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1111                shadow_compare(sampler, rgba, p, j);
1112             }
1113
1114             if (level0 != level1) {
1115                /* get texels from second mipmap level and blend */
1116                float rgba2[4][4];
1117                unsigned c;
1118                x[j] /= 2;
1119                y[j] /= 2;
1120                get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
1121                          rgba2, j);
1122                if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
1123                   shadow_compare(sampler, rgba2, p, j);
1124                }
1125
1126                for (c = 0; c < NUM_CHANNELS; c++) {
1127                   rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1128                }
1129             }
1130          }
1131       }
1132       break;
1133    case PIPE_TEX_FILTER_LINEAR:
1134    case PIPE_TEX_FILTER_ANISO:
1135       {
1136          int x0[4], y0[4], x1[4], y1[4];
1137          float xw[4], yw[4]; /* weights */
1138
1139          linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
1140          linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
1141
1142          for (j = 0; j < QUAD_SIZE; j++) {
1143             float tx[4][4]; /* texels */
1144             int c;
1145             get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
1146             get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
1147             get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
1148             get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
1149             if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1150                shadow_compare4(sampler, tx, p);
1151             }
1152
1153             /* interpolate R, G, B, A */
1154             for (c = 0; c < 4; c++) {
1155                rgba[c][j] = lerp_2d(xw[j], yw[j],
1156                                     tx[c][0], tx[c][1],
1157                                     tx[c][2], tx[c][3]);
1158             }
1159
1160             if (level0 != level1) {
1161                /* get texels from second mipmap level and blend */
1162                float rgba2[4][4];
1163
1164                /* XXX: This is incorrect -- will often end up with (x0
1165                 *  == x1 && y0 == y1), meaning that we fetch the same
1166                 *  texel four times and linearly interpolate between
1167                 *  identical values.  The correct approach would be to
1168                 *  call linear_texcoord again for the second level.
1169                 */
1170                x0[j] /= 2;
1171                y0[j] /= 2;
1172                x1[j] /= 2;
1173                y1[j] /= 2;
1174                get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
1175                get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
1176                get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
1177                get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
1178                if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
1179                   shadow_compare4(sampler, tx, p);
1180                }
1181
1182                /* interpolate R, G, B, A */
1183                for (c = 0; c < 4; c++) {
1184                   rgba2[c][j] = lerp_2d(xw[j], yw[j],
1185                                         tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
1186                }
1187
1188                for (c = 0; c < NUM_CHANNELS; c++) {
1189                   rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1190                }
1191             }
1192          }
1193       }
1194       break;
1195    default:
1196       assert(0);
1197    }
1198 }
1199
1200
1201 static INLINE void
1202 sp_get_samples_1d(const struct tgsi_sampler *sampler,
1203                   const float s[QUAD_SIZE],
1204                   const float t[QUAD_SIZE],
1205                   const float p[QUAD_SIZE],
1206                   boolean computeLambda,
1207                   float lodbias,
1208                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1209 {
1210    static const unsigned faces[4] = {0, 0, 0, 0};
1211    static const float tzero[4] = {0, 0, 0, 0};
1212    sp_get_samples_2d_common(sampler, s, tzero, NULL,
1213                             computeLambda, lodbias, rgba, faces);
1214 }
1215
1216
1217 static INLINE void
1218 sp_get_samples_2d(const struct tgsi_sampler *sampler,
1219                   const float s[QUAD_SIZE],
1220                   const float t[QUAD_SIZE],
1221                   const float p[QUAD_SIZE],
1222                   boolean computeLambda,
1223                   float lodbias,
1224                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1225 {
1226    static const unsigned faces[4] = {0, 0, 0, 0};
1227    sp_get_samples_2d_common(sampler, s, t, p,
1228                             computeLambda, lodbias, rgba, faces);
1229 }
1230
1231
1232 static INLINE void
1233 sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
1234                   const float s[QUAD_SIZE],
1235                   const float t[QUAD_SIZE],
1236                   const float p[QUAD_SIZE],
1237                   boolean computeLambda,
1238                   float lodbias,
1239                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1240 {
1241    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1242    const struct pipe_texture *texture = samp->texture;
1243    const struct pipe_sampler_state *sampler = samp->sampler;
1244    /* get/map pipe_surfaces corresponding to 3D tex slices */
1245    unsigned level0, level1, j, imgFilter;
1246    int width, height, depth;
1247    float levelBlend;
1248    const uint face = 0;
1249
1250    choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
1251                         &level0, &level1, &levelBlend, &imgFilter);
1252
1253    assert(sampler->normalized_coords);
1254
1255    width = texture->width[level0];
1256    height = texture->height[level0];
1257    depth = texture->depth[level0];
1258
1259    assert(width > 0);
1260    assert(height > 0);
1261    assert(depth > 0);
1262
1263    switch (imgFilter) {
1264    case PIPE_TEX_FILTER_NEAREST:
1265       {
1266          int x[4], y[4], z[4];
1267          nearest_texcoord_4(sampler->wrap_s, s, width, x);
1268          nearest_texcoord_4(sampler->wrap_t, t, height, y);
1269          nearest_texcoord_4(sampler->wrap_r, p, depth, z);
1270          for (j = 0; j < QUAD_SIZE; j++) {
1271             get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
1272             if (level0 != level1) {
1273                /* get texels from second mipmap level and blend */
1274                float rgba2[4][4];
1275                unsigned c;
1276                x[j] /= 2;
1277                y[j] /= 2;
1278                z[j] /= 2;
1279                get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
1280                for (c = 0; c < NUM_CHANNELS; c++) {
1281                   rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
1282                }
1283             }
1284          }
1285       }
1286       break;
1287    case PIPE_TEX_FILTER_LINEAR:
1288    case PIPE_TEX_FILTER_ANISO:
1289       {
1290          int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1291          float xw[4], yw[4], zw[4]; /* interpolation weights */
1292          linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
1293          linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
1294          linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
1295
1296          for (j = 0; j < QUAD_SIZE; j++) {
1297             int c;
1298             float tx0[4][4], tx1[4][4];
1299             get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
1300             get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
1301             get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
1302             get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
1303             get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
1304             get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
1305             get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
1306             get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
1307
1308             /* interpolate R, G, B, A */
1309             for (c = 0; c < 4; c++) {
1310                rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1311                                     tx0[c][0], tx0[c][1],
1312                                     tx0[c][2], tx0[c][3],
1313                                     tx1[c][0], tx1[c][1],
1314                                     tx1[c][2], tx1[c][3]);
1315             }
1316
1317             if (level0 != level1) {
1318                /* get texels from second mipmap level and blend */
1319                float rgba2[4][4];
1320                x0[j] /= 2;
1321                y0[j] /= 2;
1322                z0[j] /= 2;
1323                x1[j] /= 2;
1324                y1[j] /= 2;
1325                z1[j] /= 2;
1326                get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
1327                get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
1328                get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
1329                get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
1330                get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
1331                get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
1332                get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
1333                get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
1334
1335                /* interpolate R, G, B, A */
1336                for (c = 0; c < 4; c++) {
1337                   rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1338                                         tx0[c][0], tx0[c][1],
1339                                         tx0[c][2], tx0[c][3],
1340                                         tx1[c][0], tx1[c][1],
1341                                         tx1[c][2], tx1[c][3]);
1342                }
1343
1344                /* blend mipmap levels */
1345                for (c = 0; c < NUM_CHANNELS; c++) {
1346                   rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1347                }
1348             }
1349          }
1350       }
1351       break;
1352    default:
1353       assert(0);
1354    }
1355 }
1356
1357
1358 static void
1359 sp_get_samples_cube(const struct tgsi_sampler *sampler,
1360                     const float s[QUAD_SIZE],
1361                     const float t[QUAD_SIZE],
1362                     const float p[QUAD_SIZE],
1363                     boolean computeLambda,
1364                     float lodbias,
1365                     float rgba[NUM_CHANNELS][QUAD_SIZE])
1366 {
1367    unsigned faces[QUAD_SIZE], j;
1368    float ssss[4], tttt[4];
1369    for (j = 0; j < QUAD_SIZE; j++) {
1370       faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
1371    }
1372    sp_get_samples_2d_common(sampler, ssss, tttt, NULL,
1373                             computeLambda, lodbias, rgba, faces);
1374 }
1375
1376
1377 static void
1378 sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
1379                     const float s[QUAD_SIZE],
1380                     const float t[QUAD_SIZE],
1381                     const float p[QUAD_SIZE],
1382                     boolean computeLambda,
1383                     float lodbias,
1384                     float rgba[NUM_CHANNELS][QUAD_SIZE])
1385 {
1386    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1387    const struct pipe_texture *texture = samp->texture;
1388    const struct pipe_sampler_state *sampler = samp->sampler;
1389    const uint face = 0;
1390    unsigned level0, level1, j, imgFilter;
1391    int width, height;
1392    float levelBlend;
1393
1394    choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
1395                         &level0, &level1, &levelBlend, &imgFilter);
1396
1397    /* texture RECTS cannot be mipmapped */
1398    assert(level0 == level1);
1399
1400    width = texture->width[level0];
1401    height = texture->height[level0];
1402
1403    assert(width > 0);
1404
1405    switch (imgFilter) {
1406    case PIPE_TEX_FILTER_NEAREST:
1407       {
1408          int x[4], y[4];
1409          nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
1410          nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
1411          for (j = 0; j < QUAD_SIZE; j++) {
1412             get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
1413             if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1414                shadow_compare(sampler, rgba, p, j);
1415             }
1416          }
1417       }
1418       break;
1419    case PIPE_TEX_FILTER_LINEAR:
1420    case PIPE_TEX_FILTER_ANISO:
1421       {
1422          int x0[4], y0[4], x1[4], y1[4];
1423          float xw[4], yw[4]; /* weights */
1424          linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
1425          linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
1426          for (j = 0; j < QUAD_SIZE; j++) {
1427             float tx[4][4]; /* texels */
1428             int c;
1429             get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
1430             get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
1431             get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
1432             get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
1433             if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1434                shadow_compare4(sampler, tx, p);
1435             }
1436             for (c = 0; c < 4; c++) {
1437                rgba[c][j] = lerp_2d(xw[j], yw[j],
1438                                     tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
1439             }
1440          }
1441       }
1442       break;
1443    default:
1444       assert(0);
1445    }
1446 }
1447
1448
1449 /**
1450  * Common code for vertex/fragment program texture sampling.
1451  */
1452 static INLINE void
1453 sp_get_samples(struct tgsi_sampler *tgsi_sampler,
1454                const float s[QUAD_SIZE],
1455                const float t[QUAD_SIZE],
1456                const float p[QUAD_SIZE],
1457                boolean computeLambda,
1458                float lodbias,
1459                float rgba[NUM_CHANNELS][QUAD_SIZE])
1460 {
1461    const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1462    const struct pipe_texture *texture = samp->texture;
1463    const struct pipe_sampler_state *sampler = samp->sampler;
1464
1465    if (!texture)
1466       return;
1467
1468    switch (texture->target) {
1469    case PIPE_TEXTURE_1D:
1470       assert(sampler->normalized_coords);
1471       sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1472       break;
1473    case PIPE_TEXTURE_2D:
1474       if (sampler->normalized_coords)
1475          sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1476       else
1477          sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1478       break;
1479    case PIPE_TEXTURE_3D:
1480       assert(sampler->normalized_coords);
1481       sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1482       break;
1483    case PIPE_TEXTURE_CUBE:
1484       assert(sampler->normalized_coords);
1485       sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1486       break;
1487    default:
1488       assert(0);
1489    }
1490
1491 #if 0 /* DEBUG */
1492    {
1493       int i;
1494       printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]);
1495       for (i = 0; i < 4; i++) {
1496          printf("Frag %d: %f %f %f %f\n", i,
1497                 rgba[0][i],
1498                 rgba[1][i],
1499                 rgba[2][i],
1500                 rgba[3][i]);
1501       }
1502    }
1503 #endif
1504 }
1505
1506 static void
1507 sp_get_samples_fallback(struct tgsi_sampler *tgsi_sampler,
1508                         const float s[QUAD_SIZE],
1509                         const float t[QUAD_SIZE],
1510                         const float p[QUAD_SIZE],
1511                         float lodbias,
1512                         float rgba[NUM_CHANNELS][QUAD_SIZE])
1513 {
1514    sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba);
1515 }
1516
1517 /**
1518  * Called via tgsi_sampler::get_samples() when running a fragment shader.
1519  * Get four filtered RGBA values from the sampler's texture.
1520  */
1521 void
1522 sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
1523                         const float s[QUAD_SIZE],
1524                         const float t[QUAD_SIZE],
1525                         const float p[QUAD_SIZE],
1526                         float lodbias,
1527                         float rgba[NUM_CHANNELS][QUAD_SIZE])
1528 {
1529    struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1530    const struct pipe_texture *texture = samp->texture;
1531    const struct pipe_sampler_state *sampler = samp->sampler;
1532
1533    tgsi_sampler->get_samples = sp_get_samples_fallback;
1534
1535    /* Try to hook in a faster sampler.  Ultimately we'll have to
1536     * code-generate these.  Luckily most of this looks like it is
1537     * orthogonal state within the sampler.
1538     */
1539    if (texture->target == PIPE_TEXTURE_2D &&
1540        sampler->min_img_filter == sampler->mag_img_filter &&
1541        sampler->wrap_s == sampler->wrap_t &&
1542        sampler->compare_mode == FALSE &&
1543        sampler->normalized_coords)
1544    {
1545       samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
1546       samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
1547
1548       if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1549          samp->level = CLAMP((int) sampler->min_lod,
1550                              0, (int) texture->last_level);
1551
1552          if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
1553             switch (sampler->min_img_filter) {
1554             case PIPE_TEX_FILTER_NEAREST:
1555                tgsi_sampler->get_samples = sp_get_samples_2d_nearest_repeat_POT;
1556                break;
1557             case PIPE_TEX_FILTER_LINEAR:
1558                tgsi_sampler->get_samples = sp_get_samples_2d_linear_repeat_POT;
1559                break;
1560             default:
1561                break;
1562             }
1563          }
1564          else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
1565             switch (sampler->min_img_filter) {
1566             case PIPE_TEX_FILTER_NEAREST:
1567                tgsi_sampler->get_samples = sp_get_samples_2d_nearest_clamp_POT;
1568                break;
1569             default:
1570                break;
1571             }
1572          }
1573       }
1574       else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1575          if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
1576             switch (sampler->min_img_filter) {
1577             case PIPE_TEX_FILTER_LINEAR:
1578                tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT;
1579                break;
1580             default:
1581                break;
1582             }
1583          }
1584       }
1585    }
1586    else if (0) {
1587       _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
1588                     texture->target, PIPE_TEXTURE_2D,
1589                     sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
1590                     sampler->min_img_filter, sampler->mag_img_filter,
1591                     sampler->wrap_s, sampler->wrap_t,
1592                     sampler->compare_mode, FALSE,
1593                     sampler->normalized_coords, TRUE);
1594    }
1595
1596    tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
1597 }
1598
1599
1600 /**
1601  * Called via tgsi_sampler::get_samples() when running a vertex shader.
1602  * Get four filtered RGBA values from the sampler's texture.
1603  */
1604 void
1605 sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
1606                       const float s[QUAD_SIZE],
1607                       const float t[QUAD_SIZE],
1608                       const float p[QUAD_SIZE],
1609                       float lodbias,
1610                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1611 {
1612    sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba);
1613 }