src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_memory.h"
  42 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  43 #include "sp_tex_sample.h"
  44 #include "sp_tex_tile_cache.h"
  45
  46
  47
  48 /*
  49  * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
  50  * see 1-pixel bands of improperly weighted linear-filtered textures.
  51  * The tests/texwrap.c demo is a good test.
  52  * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
  53  * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
  54  */
  55 #define FRAC(f)  ((f) - util_ifloor(f))
  56
  57
  58 /**
  59  * Linear interpolation macro
  60  */
  61 static INLINE float
  62 lerp(float a, float v0, float v1)
  63 {
  64    return v0 + a * (v1 - v0);
  65 }
  66
  67
  68 /**
  69  * Do 2D/biliner interpolation of float values.
  70  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  71  * a and b are the horizontal and vertical interpolants.
  72  * It's important that this function is inlined when compiled with
  73  * optimization!  If we find that's not true on some systems, convert
  74  * to a macro.
  75  */
  76 static INLINE float
  77 lerp_2d(float a, float b,
  78         float v00, float v10, float v01, float v11)
  79 {
  80    const float temp0 = lerp(a, v00, v10);
  81    const float temp1 = lerp(a, v01, v11);
  82    return lerp(b, temp0, temp1);
  83 }
  84
  85
  86 /**
  87  * As above, but 3D interpolation of 8 values.
  88  */
  89 static INLINE float
  90 lerp_3d(float a, float b, float c,
  91         float v000, float v100, float v010, float v110,
  92         float v001, float v101, float v011, float v111)
  93 {
  94    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
  95    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
  96    return lerp(c, temp0, temp1);
  97 }
  98
  99
 100
 101 /**
 102  * If A is a signed integer, A % B doesn't give the right value for A < 0
 103  * (in terms of texture repeat).  Just casting to unsigned fixes that.
 104  */
 105 #define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
 106
 107
 108 /**
 109  * Apply texture coord wrapping mode and return integer texture indexes
 110  * for a vector of four texcoords (S or T or P).
 111  * \param wrapMode  PIPE_TEX_WRAP_x
 112  * \param s  the incoming texcoords
 113  * \param size  the texture image size
 114  * \param icoord  returns the integer texcoords
 115  * \return  integer texture index
 116  */
 117 static void
 118 wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
 119 {
 120    uint ch;
 121    /* s limited to [0,1) */
 122    /* i limited to [0,size-1] */
 123    for (ch = 0; ch < 4; ch++) {
 124       int i = util_ifloor(s[ch] * size);
 125       icoord[ch] = REMAINDER(i, size);
 126    }
 127 }
 128
 129
 130 static void
 131 wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
 132 {
 133    uint ch;
 134    /* s limited to [0,1] */
 135    /* i limited to [0,size-1] */
 136    for (ch = 0; ch < 4; ch++) {
 137       if (s[ch] <= 0.0F)
 138          icoord[ch] = 0;
 139       else if (s[ch] >= 1.0F)
 140          icoord[ch] = size - 1;
 141       else
 142          icoord[ch] = util_ifloor(s[ch] * size);
 143    }
 144 }
 145
 146
 147 static void
 148 wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4])
 149 {
 150    uint ch;
 151    /* s limited to [min,max] */
 152    /* i limited to [0, size-1] */
 153    const float min = 1.0F / (2.0F * size);
 154    const float max = 1.0F - min;
 155    for (ch = 0; ch < 4; ch++) {
 156       if (s[ch] < min)
 157          icoord[ch] = 0;
 158       else if (s[ch] > max)
 159          icoord[ch] = size - 1;
 160       else
 161          icoord[ch] = util_ifloor(s[ch] * size);
 162    }
 163 }
 164
 165
 166 static void
 167 wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4])
 168 {
 169    uint ch;
 170    /* s limited to [min,max] */
 171    /* i limited to [-1, size] */
 172    const float min = -1.0F / (2.0F * size);
 173    const float max = 1.0F - min;
 174    for (ch = 0; ch < 4; ch++) {
 175       if (s[ch] <= min)
 176          icoord[ch] = -1;
 177       else if (s[ch] >= max)
 178          icoord[ch] = size;
 179       else
 180          icoord[ch] = util_ifloor(s[ch] * size);
 181    }
 182 }
 183
 184
 185 static void
 186 wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4])
 187 {
 188    uint ch;
 189    const float min = 1.0F / (2.0F * size);
 190    const float max = 1.0F - min;
 191    for (ch = 0; ch < 4; ch++) {
 192       const int flr = util_ifloor(s[ch]);
 193       float u;
 194       if (flr & 1)
 195          u = 1.0F - (s[ch] - (float) flr);
 196       else
 197          u = s[ch] - (float) flr;
 198       if (u < min)
 199          icoord[ch] = 0;
 200       else if (u > max)
 201          icoord[ch] = size - 1;
 202       else
 203          icoord[ch] = util_ifloor(u * size);
 204    }
 205 }
 206
 207
 208 static void
 209 wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4])
 210 {
 211    uint ch;
 212    for (ch = 0; ch < 4; ch++) {
 213       /* s limited to [0,1] */
 214       /* i limited to [0,size-1] */
 215       const float u = fabsf(s[ch]);
 216       if (u <= 0.0F)
 217          icoord[ch] = 0;
 218       else if (u >= 1.0F)
 219          icoord[ch] = size - 1;
 220       else
 221          icoord[ch] = util_ifloor(u * size);
 222    }
 223 }
 224
 225
 226 static void
 227 wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
 228                                   int icoord[4])
 229 {
 230    uint ch;
 231    /* s limited to [min,max] */
 232    /* i limited to [0, size-1] */
 233    const float min = 1.0F / (2.0F * size);
 234    const float max = 1.0F - min;
 235    for (ch = 0; ch < 4; ch++) {
 236       const float u = fabsf(s[ch]);
 237       if (u < min)
 238          icoord[ch] = 0;
 239       else if (u > max)
 240          icoord[ch] = size - 1;
 241       else
 242          icoord[ch] = util_ifloor(u * size);
 243    }
 244 }
 245
 246
 247 static void
 248 wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
 249                                     int icoord[4])
 250 {
 251    uint ch;
 252    /* s limited to [min,max] */
 253    /* i limited to [0, size-1] */
 254    const float min = -1.0F / (2.0F * size);
 255    const float max = 1.0F - min;
 256    for (ch = 0; ch < 4; ch++) {
 257       const float u = fabsf(s[ch]);
 258       if (u < min)
 259          icoord[ch] = -1;
 260       else if (u > max)
 261          icoord[ch] = size;
 262       else
 263          icoord[ch] = util_ifloor(u * size);
 264    }
 265 }
 266
 267
 268 /**
 269  * Used to compute texel locations for linear sampling for four texcoords.
 270  * \param wrapMode  PIPE_TEX_WRAP_x
 271  * \param s  the texcoords
 272  * \param size  the texture image size
 273  * \param icoord0  returns first texture indexes
 274  * \param icoord1  returns second texture indexes (usually icoord0 + 1)
 275  * \param w  returns blend factor/weight between texture indexes
 276  * \param icoord  returns the computed integer texture coords
 277  */
 278 static void
 279 wrap_linear_repeat(const float s[4], unsigned size,
 280                    int icoord0[4], int icoord1[4], float w[4])
 281 {
 282    uint ch;
 283    for (ch = 0; ch < 4; ch++) {
 284       float u = s[ch] * size - 0.5F;
 285       icoord0[ch] = REMAINDER(util_ifloor(u), size);
 286       icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
 287       w[ch] = FRAC(u);
 288    }
 289 }
 290
 291
 292 static void
 293 wrap_linear_clamp(const float s[4], unsigned size,
 294                   int icoord0[4], int icoord1[4], float w[4])
 295 {
 296    uint ch;
 297    for (ch = 0; ch < 4; ch++) {
 298       float u = CLAMP(s[ch], 0.0F, 1.0F);
 299       u = u * size - 0.5f;
 300       icoord0[ch] = util_ifloor(u);
 301       icoord1[ch] = icoord0[ch] + 1;
 302       w[ch] = FRAC(u);
 303    }
 304 }
 305
 306
 307 static void
 308 wrap_linear_clamp_to_edge(const float s[4], unsigned size,
 309                           int icoord0[4], int icoord1[4], float w[4])
 310 {
 311    uint ch;
 312    for (ch = 0; ch < 4; ch++) {
 313       float u = CLAMP(s[ch], 0.0F, 1.0F);
 314       u = u * size - 0.5f;
 315       icoord0[ch] = util_ifloor(u);
 316       icoord1[ch] = icoord0[ch] + 1;
 317       if (icoord0[ch] < 0)
 318          icoord0[ch] = 0;
 319       if (icoord1[ch] >= (int) size)
 320          icoord1[ch] = size - 1;
 321       w[ch] = FRAC(u);
 322    }
 323 }
 324
 325
 326 static void
 327 wrap_linear_clamp_to_border(const float s[4], unsigned size,
 328                             int icoord0[4], int icoord1[4], float w[4])
 329 {
 330    const float min = -1.0F / (2.0F * size);
 331    const float max = 1.0F - min;
 332    uint ch;
 333    for (ch = 0; ch < 4; ch++) {
 334       float u = CLAMP(s[ch], min, max);
 335       u = u * size - 0.5f;
 336       icoord0[ch] = util_ifloor(u);
 337       icoord1[ch] = icoord0[ch] + 1;
 338       w[ch] = FRAC(u);
 339    }
 340 }
 341
 342
 343 static void
 344 wrap_linear_mirror_repeat(const float s[4], unsigned size,
 345                           int icoord0[4], int icoord1[4], float w[4])
 346 {
 347    uint ch;
 348    for (ch = 0; ch < 4; ch++) {
 349       const int flr = util_ifloor(s[ch]);
 350       float u;
 351       if (flr & 1)
 352          u = 1.0F - (s[ch] - (float) flr);
 353       else
 354          u = s[ch] - (float) flr;
 355       u = u * size - 0.5F;
 356       icoord0[ch] = util_ifloor(u);
 357       icoord1[ch] = icoord0[ch] + 1;
 358       if (icoord0[ch] < 0)
 359          icoord0[ch] = 0;
 360       if (icoord1[ch] >= (int) size)
 361          icoord1[ch] = size - 1;
 362       w[ch] = FRAC(u);
 363    }
 364 }
 365
 366
 367 static void
 368 wrap_linear_mirror_clamp(const float s[4], unsigned size,
 369                          int icoord0[4], int icoord1[4], float w[4])
 370 {
 371    uint ch;
 372    for (ch = 0; ch < 4; ch++) {
 373       float u = fabsf(s[ch]);
 374       if (u >= 1.0F)
 375          u = (float) size;
 376       else
 377          u *= size;
 378       u -= 0.5F;
 379       icoord0[ch] = util_ifloor(u);
 380       icoord1[ch] = icoord0[ch] + 1;
 381       w[ch] = FRAC(u);
 382    }
 383 }
 384
 385
 386 static void
 387 wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
 388                                  int icoord0[4], int icoord1[4], float w[4])
 389 {
 390    uint ch;
 391    for (ch = 0; ch < 4; ch++) {
 392       float u = fabsf(s[ch]);
 393       if (u >= 1.0F)
 394          u = (float) size;
 395       else
 396          u *= size;
 397       u -= 0.5F;
 398       icoord0[ch] = util_ifloor(u);
 399       icoord1[ch] = icoord0[ch] + 1;
 400       if (icoord0[ch] < 0)
 401          icoord0[ch] = 0;
 402       if (icoord1[ch] >= (int) size)
 403          icoord1[ch] = size - 1;
 404       w[ch] = FRAC(u);
 405    }
 406 }
 407
 408
 409 static void
 410 wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
 411                                    int icoord0[4], int icoord1[4], float w[4])
 412 {
 413    const float min = -1.0F / (2.0F * size);
 414    const float max = 1.0F - min;
 415    uint ch;
 416    for (ch = 0; ch < 4; ch++) {
 417       float u = fabsf(s[ch]);
 418       if (u <= min)
 419          u = min * size;
 420       else if (u >= max)
 421          u = max * size;
 422       else
 423          u *= size;
 424       u -= 0.5F;
 425       icoord0[ch] = util_ifloor(u);
 426       icoord1[ch] = icoord0[ch] + 1;
 427       w[ch] = FRAC(u);
 428    }
 429 }
 430
 431
 432 /**
 433  * For RECT textures / unnormalized texcoords
 434  * Only a subset of wrap modes supported.
 435  */
 436 static void
 437 wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4])
 438 {
 439    uint ch;
 440    for (ch = 0; ch < 4; ch++) {
 441       int i = util_ifloor(s[ch]);
 442       icoord[ch]= CLAMP(i, 0, (int) size-1);
 443    }
 444 }
 445
 446
 447 /**
 448  * Handles clamp_to_edge and clamp_to_border:
 449  */
 450 static void
 451 wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
 452                                    int icoord[4])
 453 {
 454    uint ch;
 455    for (ch = 0; ch < 4; ch++) {
 456       icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
 457    }
 458 }
 459
 460
 461 /**
 462  * For RECT textures / unnormalized texcoords.
 463  * Only a subset of wrap modes supported.
 464  */
 465 static void
 466 wrap_linear_unorm_clamp(const float s[4], unsigned size,
 467                         int icoord0[4], int icoord1[4], float w[4])
 468 {
 469    uint ch;
 470    for (ch = 0; ch < 4; ch++) {
 471       /* Not exactly what the spec says, but it matches NVIDIA output */
 472       float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
 473       icoord0[ch] = util_ifloor(u);
 474       icoord1[ch] = icoord0[ch] + 1;
 475       w[ch] = FRAC(u);
 476    }
 477 }
 478
 479
 480 static void
 481 wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
 482                                   int icoord0[4], int icoord1[4], float w[4])
 483 {
 484    uint ch;
 485    for (ch = 0; ch < 4; ch++) {
 486       float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
 487       u -= 0.5F;
 488       icoord0[ch] = util_ifloor(u);
 489       icoord1[ch] = icoord0[ch] + 1;
 490       if (icoord1[ch] > (int) size - 1)
 491          icoord1[ch] = size - 1;
 492       w[ch] = FRAC(u);
 493    }
 494 }
 495
 496
 497
 498 /**
 499  * Examine the quad's texture coordinates to compute the partial
 500  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 501  */
 502 static float
 503 compute_lambda_1d(const struct sp_sampler_varient *samp,
 504                   const float s[QUAD_SIZE],
 505                   const float t[QUAD_SIZE],
 506                   const float p[QUAD_SIZE],
 507                   float lodbias)
 508 {
 509    const struct pipe_texture *texture = samp->texture;
 510    const struct pipe_sampler_state *sampler = samp->sampler;
 511    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 512    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 513    float rho = MAX2(dsdx, dsdy) * texture->width[0];
 514    float lambda;
 515
 516    lambda = util_fast_log2(rho);
 517    lambda += lodbias + sampler->lod_bias;
 518    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 519
 520    return lambda;
 521 }
 522
 523
 524 static float
 525 compute_lambda_2d(const struct sp_sampler_varient *samp,
 526                   const float s[QUAD_SIZE],
 527                   const float t[QUAD_SIZE],
 528                   const float p[QUAD_SIZE],
 529                   float lodbias)
 530 {
 531    const struct pipe_texture *texture = samp->texture;
 532    const struct pipe_sampler_state *sampler = samp->sampler;
 533    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 534    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 535    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 536    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 537    float maxx = MAX2(dsdx, dsdy) * texture->width[0];
 538    float maxy = MAX2(dtdx, dtdy) * texture->height[0];
 539    float rho  = MAX2(maxx, maxy);
 540    float lambda;
 541
 542    lambda = util_fast_log2(rho);
 543    lambda += lodbias + sampler->lod_bias;
 544    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 545
 546    return lambda;
 547 }
 548
 549
 550 static float
 551 compute_lambda_3d(const struct sp_sampler_varient *samp,
 552                   const float s[QUAD_SIZE],
 553                   const float t[QUAD_SIZE],
 554                   const float p[QUAD_SIZE],
 555                   float lodbias)
 556 {
 557    const struct pipe_texture *texture = samp->texture;
 558    const struct pipe_sampler_state *sampler = samp->sampler;
 559    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 560    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 561    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 562    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 563    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 564    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 565    float maxx = MAX2(dsdx, dsdy) * texture->width[0];
 566    float maxy = MAX2(dtdx, dtdy) * texture->height[0];
 567    float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
 568    float rho, lambda;
 569
 570    rho = MAX2(maxx, maxy);
 571    rho = MAX2(rho, maxz);
 572
 573    lambda = util_fast_log2(rho);
 574    lambda += lodbias + sampler->lod_bias;
 575    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 576
 577    return lambda;
 578 }
 579
 580
 581 /**
 582  * Compute lambda for a vertex texture sampler.
 583  * Since there aren't derivatives to use, just return the LOD bias.
 584  */
 585 static float
 586 compute_lambda_vert(const struct sp_sampler_varient *samp,
 587                     const float s[QUAD_SIZE],
 588                     const float t[QUAD_SIZE],
 589                     const float p[QUAD_SIZE],
 590                     float lodbias)
 591 {
 592    return lodbias;
 593 }
 594
 595
 596
 597 /**
 598  * Get a texel from a texture, using the texture tile cache.
 599  *
 600  * \param addr  the template tex address containing cube, z, face info.
 601  * \param x  the x coord of texel within 2D image
 602  * \param y  the y coord of texel within 2D image
 603  * \param rgba  the quad to put the texel/color into
 604  *
 605  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 606  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
 607  */
 608
 609
 610
 611
 612 static INLINE const float *
 613 get_texel_2d_no_border(const struct sp_sampler_varient *samp,
 614                        union tex_tile_address addr, int x, int y)
 615 {
 616    const struct softpipe_tex_cached_tile *tile;
 617
 618    addr.bits.x = x / TILE_SIZE;
 619    addr.bits.y = y / TILE_SIZE;
 620    y %= TILE_SIZE;
 621    x %= TILE_SIZE;
 622
 623    tile = sp_get_cached_tile_tex(samp->cache, addr);
 624
 625    return &tile->data.color[y][x][0];
 626 }
 627
 628
 629 static INLINE const float *
 630 get_texel_2d(const struct sp_sampler_varient *samp,
 631              union tex_tile_address addr, int x, int y)
 632 {
 633    const struct pipe_texture *texture = samp->texture;
 634    unsigned level = addr.bits.level;
 635
 636    if (x < 0 || x >= (int) texture->width[level] ||
 637        y < 0 || y >= (int) texture->height[level]) {
 638       return samp->sampler->border_color;
 639    }
 640    else {
 641       return get_texel_2d_no_border( samp, addr, x, y );
 642    }
 643 }
 644
 645
 646 /* Gather a quad of adjacent texels within a tile:
 647  */
 648 static INLINE void
 649 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
 650                                         union tex_tile_address addr,
 651                                         unsigned x, unsigned y,
 652                                         const float *out[4])
 653 {
 654    const struct softpipe_tex_cached_tile *tile;
 655
 656    addr.bits.x = x / TILE_SIZE;
 657    addr.bits.y = y / TILE_SIZE;
 658    y %= TILE_SIZE;
 659    x %= TILE_SIZE;
 660
 661    tile = sp_get_cached_tile_tex(samp->cache, addr);
 662
 663    out[0] = &tile->data.color[y  ][x  ][0];
 664    out[1] = &tile->data.color[y  ][x+1][0];
 665    out[2] = &tile->data.color[y+1][x  ][0];
 666    out[3] = &tile->data.color[y+1][x+1][0];
 667 }
 668
 669
 670 /* Gather a quad of potentially non-adjacent texels:
 671  */
 672 static INLINE void
 673 get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
 674                             union tex_tile_address addr,
 675                             int x0, int y0,
 676                             int x1, int y1,
 677                             const float *out[4])
 678 {
 679    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
 680    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
 681    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
 682    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
 683 }
 684
 685 /* Can involve a lot of unnecessary checks for border color:
 686  */
 687 static INLINE void
 688 get_texel_quad_2d(const struct sp_sampler_varient *samp,
 689                   union tex_tile_address addr,
 690                   int x0, int y0,
 691                   int x1, int y1,
 692                   const float *out[4])
 693 {
 694    out[0] = get_texel_2d( samp, addr, x0, y0 );
 695    out[1] = get_texel_2d( samp, addr, x1, y0 );
 696    out[3] = get_texel_2d( samp, addr, x1, y1 );
 697    out[2] = get_texel_2d( samp, addr, x0, y1 );
 698 }
 699
 700
 701
 702 /* 3d varients:
 703  */
 704 static INLINE const float *
 705 get_texel_3d_no_border(const struct sp_sampler_varient *samp,
 706                        union tex_tile_address addr, int x, int y, int z)
 707 {
 708    const struct softpipe_tex_cached_tile *tile;
 709
 710    addr.bits.x = x / TILE_SIZE;
 711    addr.bits.y = y / TILE_SIZE;
 712    addr.bits.z = z;
 713    y %= TILE_SIZE;
 714    x %= TILE_SIZE;
 715
 716    tile = sp_get_cached_tile_tex(samp->cache, addr);
 717
 718    return &tile->data.color[y][x][0];
 719 }
 720
 721
 722 static INLINE const float *
 723 get_texel_3d(const struct sp_sampler_varient *samp,
 724              union tex_tile_address addr, int x, int y, int z)
 725 {
 726    const struct pipe_texture *texture = samp->texture;
 727    unsigned level = addr.bits.level;
 728
 729    if (x < 0 || x >= (int) texture->width[level] ||
 730        y < 0 || y >= (int) texture->height[level] ||
 731        z < 0 || z >= (int) texture->depth[level]) {
 732       return samp->sampler->border_color;
 733    }
 734    else {
 735       return get_texel_3d_no_border( samp, addr, x, y, z );
 736    }
 737 }
 738
 739
 740 /**
 741  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 742  * return the size (in texels) of that mipmap level.
 743  * For example, if level[0].width = 256 then base_pot will be 8.
 744  * If level = 2, then we'll return 64 (the width at level=2).
 745  * Return 1 if level > base_pot.
 746  */
 747 static INLINE unsigned
 748 pot_level_size(unsigned base_pot, unsigned level)
 749 {
 750    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 751 }
 752
 753
 754 /* Some image-filter fastpaths:
 755  */
 756 static INLINE void
 757 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 758                                 const float s[QUAD_SIZE],
 759                                 const float t[QUAD_SIZE],
 760                                 const float p[QUAD_SIZE],
 761                                 float lodbias,
 762                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 763 {
 764    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 765    unsigned  j;
 766    unsigned level = samp->level;
 767    unsigned xpot = pot_level_size(samp->xpot, level);
 768    unsigned ypot = pot_level_size(samp->ypot, level);
 769    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 770    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 771    union tex_tile_address addr;
 772
 773    addr.value = 0;
 774    addr.bits.level = samp->level;
 775
 776    for (j = 0; j < QUAD_SIZE; j++) {
 777       int c;
 778
 779       float u = s[j] * xpot - 0.5F;
 780       float v = t[j] * ypot - 0.5F;
 781
 782       int uflr = util_ifloor(u);
 783       int vflr = util_ifloor(v);
 784
 785       float xw = u - (float)uflr;
 786       float yw = v - (float)vflr;
 787
 788       int x0 = uflr & (xpot - 1);
 789       int y0 = vflr & (ypot - 1);
 790
 791       const float *tx[4];
 792
 793       /* Can we fetch all four at once:
 794        */
 795       if (x0 < xmax && y0 < ymax) {
 796          get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
 797       }
 798       else {
 799          unsigned x1 = (x0 + 1) & (xpot - 1);
 800          unsigned y1 = (y0 + 1) & (ypot - 1);
 801          get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
 802       }
 803
 804       /* interpolate R, G, B, A */
 805       for (c = 0; c < 4; c++) {
 806          rgba[c][j] = lerp_2d(xw, yw,
 807                               tx[0][c], tx[1][c],
 808                               tx[2][c], tx[3][c]);
 809       }
 810    }
 811 }
 812
 813
 814 static INLINE void
 815 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 816                                  const float s[QUAD_SIZE],
 817                                  const float t[QUAD_SIZE],
 818                                  const float p[QUAD_SIZE],
 819                                  float lodbias,
 820                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 821 {
 822    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 823    unsigned  j;
 824    unsigned level = samp->level;
 825    unsigned xpot = pot_level_size(samp->xpot, level);
 826    unsigned ypot = pot_level_size(samp->ypot, level);
 827    union tex_tile_address addr;
 828
 829    addr.value = 0;
 830    addr.bits.level = samp->level;
 831
 832    for (j = 0; j < QUAD_SIZE; j++) {
 833       int c;
 834
 835       float u = s[j] * xpot;
 836       float v = t[j] * ypot;
 837
 838       int uflr = util_ifloor(u);
 839       int vflr = util_ifloor(v);
 840
 841       int x0 = uflr & (xpot - 1);
 842       int y0 = vflr & (ypot - 1);
 843
 844       const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
 845
 846       for (c = 0; c < 4; c++) {
 847          rgba[c][j] = out[c];
 848       }
 849    }
 850 }
 851
 852
 853 static INLINE void
 854 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 855                                 const float s[QUAD_SIZE],
 856                                 const float t[QUAD_SIZE],
 857                                 const float p[QUAD_SIZE],
 858                                 float lodbias,
 859                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 860 {
 861    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 862    unsigned  j;
 863    unsigned level = samp->level;
 864    unsigned xpot = pot_level_size(samp->xpot, level);
 865    unsigned ypot = pot_level_size(samp->ypot, level);
 866    union tex_tile_address addr;
 867
 868    addr.value = 0;
 869    addr.bits.level = samp->level;
 870
 871    for (j = 0; j < QUAD_SIZE; j++) {
 872       int c;
 873
 874       float u = s[j] * xpot;
 875       float v = t[j] * ypot;
 876
 877       int x0, y0;
 878       const float *out;
 879
 880       x0 = util_ifloor(u);
 881       if (x0 < 0)
 882          x0 = 0;
 883       else if (x0 > xpot - 1)
 884          x0 = xpot - 1;
 885
 886       y0 = util_ifloor(v);
 887       if (y0 < 0)
 888          y0 = 0;
 889       else if (y0 > ypot - 1)
 890          y0 = ypot - 1;
 891
 892       out = get_texel_2d_no_border(samp, addr, x0, y0);
 893
 894       for (c = 0; c < 4; c++) {
 895          rgba[c][j] = out[c];
 896       }
 897    }
 898 }
 899
 900
 901 static void
 902 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
 903                         const float s[QUAD_SIZE],
 904                         const float t[QUAD_SIZE],
 905                         const float p[QUAD_SIZE],
 906                         float lodbias,
 907                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 908 {
 909    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 910    const struct pipe_texture *texture = samp->texture;
 911    unsigned level0, j;
 912    int width;
 913    int x[4];
 914    union tex_tile_address addr;
 915
 916    level0 = samp->level;
 917    width = texture->width[level0];
 918
 919    assert(width > 0);
 920
 921    addr.value = 0;
 922    addr.bits.level = samp->level;
 923
 924    samp->nearest_texcoord_s(s, width, x);
 925
 926    for (j = 0; j < QUAD_SIZE; j++) {
 927       const float *out = get_texel_2d(samp, addr, x[j], 0);
 928       int c;
 929       for (c = 0; c < 4; c++) {
 930          rgba[c][j] = out[c];
 931       }
 932    }
 933 }
 934
 935
 936 static void
 937 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
 938                       const float s[QUAD_SIZE],
 939                       const float t[QUAD_SIZE],
 940                       const float p[QUAD_SIZE],
 941                       float lodbias,
 942                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 943 {
 944    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 945    const struct pipe_texture *texture = samp->texture;
 946    unsigned level0, j;
 947    int width, height;
 948    int x[4], y[4];
 949    union tex_tile_address addr;
 950
 951
 952    level0 = samp->level;
 953    width = texture->width[level0];
 954    height = texture->height[level0];
 955
 956    assert(width > 0);
 957    assert(height > 0);
 958
 959    addr.value = 0;
 960    addr.bits.level = samp->level;
 961
 962    samp->nearest_texcoord_s(s, width, x);
 963    samp->nearest_texcoord_t(t, height, y);
 964
 965    for (j = 0; j < QUAD_SIZE; j++) {
 966       const float *out = get_texel_2d(samp, addr, x[j], y[j]);
 967       int c;
 968       for (c = 0; c < 4; c++) {
 969          rgba[c][j] = out[c];
 970       }
 971    }
 972 }
 973
 974
 975 static inline union tex_tile_address
 976 face(union tex_tile_address addr, unsigned face )
 977 {
 978    addr.bits.face = face;
 979    return addr;
 980 }
 981
 982
 983 static void
 984 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
 985                         const float s[QUAD_SIZE],
 986                         const float t[QUAD_SIZE],
 987                         const float p[QUAD_SIZE],
 988                         float lodbias,
 989                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 990 {
 991    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 992    const struct pipe_texture *texture = samp->texture;
 993    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
 994    unsigned level0, j;
 995    int width, height;
 996    int x[4], y[4];
 997    union tex_tile_address addr;
 998
 999    level0 = samp->level;
1000    width = texture->width[level0];
1001    height = texture->height[level0];
1002
1003    assert(width > 0);
1004    assert(height > 0);
1005
1006    addr.value = 0;
1007    addr.bits.level = samp->level;
1008
1009    samp->nearest_texcoord_s(s, width, x);
1010    samp->nearest_texcoord_t(t, height, y);
1011
1012    for (j = 0; j < QUAD_SIZE; j++) {
1013       const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
1014       int c;
1015       for (c = 0; c < 4; c++) {
1016          rgba[c][j] = out[c];
1017       }
1018    }
1019 }
1020
1021
1022 static void
1023 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1024                       const float s[QUAD_SIZE],
1025                       const float t[QUAD_SIZE],
1026                       const float p[QUAD_SIZE],
1027                       float lodbias,
1028                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1029 {
1030    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1031    const struct pipe_texture *texture = samp->texture;
1032    unsigned level0, j;
1033    int width, height, depth;
1034    int x[4], y[4], z[4];
1035    union tex_tile_address addr;
1036
1037    level0 = samp->level;
1038    width = texture->width[level0];
1039    height = texture->height[level0];
1040    depth = texture->depth[level0];
1041
1042    assert(width > 0);
1043    assert(height > 0);
1044    assert(depth > 0);
1045
1046    samp->nearest_texcoord_s(s, width,  x);
1047    samp->nearest_texcoord_t(t, height, y);
1048    samp->nearest_texcoord_p(p, depth,  z);
1049
1050    addr.value = 0;
1051    addr.bits.level = samp->level;
1052
1053    for (j = 0; j < QUAD_SIZE; j++) {
1054       const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
1055       int c;
1056       for (c = 0; c < 4; c++) {
1057          rgba[c][j] = out[c];
1058       }
1059    }
1060 }
1061
1062
1063 static void
1064 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1065                      const float s[QUAD_SIZE],
1066                      const float t[QUAD_SIZE],
1067                      const float p[QUAD_SIZE],
1068                      float lodbias,
1069                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1070 {
1071    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1072    const struct pipe_texture *texture = samp->texture;
1073    unsigned level0, j;
1074    int width;
1075    int x0[4], x1[4];
1076    float xw[4]; /* weights */
1077    union tex_tile_address addr;
1078
1079    level0 = samp->level;
1080    width = texture->width[level0];
1081
1082    assert(width > 0);
1083
1084    addr.value = 0;
1085    addr.bits.level = samp->level;
1086
1087    samp->linear_texcoord_s(s, width, x0, x1, xw);
1088
1089    for (j = 0; j < QUAD_SIZE; j++) {
1090       const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
1091       const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
1092       int c;
1093
1094       /* interpolate R, G, B, A */
1095       for (c = 0; c < 4; c++) {
1096          rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1097       }
1098    }
1099 }
1100
1101
1102 static void
1103 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1104                      const float s[QUAD_SIZE],
1105                      const float t[QUAD_SIZE],
1106                      const float p[QUAD_SIZE],
1107                      float lodbias,
1108                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1109 {
1110    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1111    const struct pipe_texture *texture = samp->texture;
1112    unsigned level0, j;
1113    int width, height;
1114    int x0[4], y0[4], x1[4], y1[4];
1115    float xw[4], yw[4]; /* weights */
1116    union tex_tile_address addr;
1117
1118    level0 = samp->level;
1119    width = texture->width[level0];
1120    height = texture->height[level0];
1121
1122    assert(width > 0);
1123    assert(height > 0);
1124
1125    addr.value = 0;
1126    addr.bits.level = samp->level;
1127
1128    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1129    samp->linear_texcoord_t(t, height, y0, y1, yw);
1130
1131    for (j = 0; j < QUAD_SIZE; j++) {
1132       const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
1133       const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
1134       const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
1135       const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
1136       int c;
1137
1138       /* interpolate R, G, B, A */
1139       for (c = 0; c < 4; c++) {
1140          rgba[c][j] = lerp_2d(xw[j], yw[j],
1141                               tx0[c], tx1[c],
1142                               tx2[c], tx3[c]);
1143       }
1144    }
1145 }
1146
1147
1148 static void
1149 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1150                        const float s[QUAD_SIZE],
1151                        const float t[QUAD_SIZE],
1152                        const float p[QUAD_SIZE],
1153                        float lodbias,
1154                        float rgba[NUM_CHANNELS][QUAD_SIZE])
1155 {
1156    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1157    const struct pipe_texture *texture = samp->texture;
1158    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1159    unsigned level0, j;
1160    int width, height;
1161    int x0[4], y0[4], x1[4], y1[4];
1162    float xw[4], yw[4]; /* weights */
1163    union tex_tile_address addr;
1164
1165    level0 = samp->level;
1166    width = texture->width[level0];
1167    height = texture->height[level0];
1168
1169    assert(width > 0);
1170    assert(height > 0);
1171
1172    addr.value = 0;
1173    addr.bits.level = samp->level;
1174
1175    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1176    samp->linear_texcoord_t(t, height, y0, y1, yw);
1177
1178    for (j = 0; j < QUAD_SIZE; j++) {
1179       union tex_tile_address addrj = face(addr, faces[j]);
1180       const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
1181       const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
1182       const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
1183       const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
1184       int c;
1185
1186       /* interpolate R, G, B, A */
1187       for (c = 0; c < 4; c++) {
1188          rgba[c][j] = lerp_2d(xw[j], yw[j],
1189                               tx0[c], tx1[c],
1190                               tx2[c], tx3[c]);
1191       }
1192    }
1193 }
1194
1195
1196 static void
1197 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1198                      const float s[QUAD_SIZE],
1199                      const float t[QUAD_SIZE],
1200                      const float p[QUAD_SIZE],
1201                      float lodbias,
1202                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1203 {
1204    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1205    const struct pipe_texture *texture = samp->texture;
1206    unsigned level0, j;
1207    int width, height, depth;
1208    int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1209    float xw[4], yw[4], zw[4]; /* interpolation weights */
1210    union tex_tile_address addr;
1211
1212    level0 = samp->level;
1213    width = texture->width[level0];
1214    height = texture->height[level0];
1215    depth = texture->depth[level0];
1216
1217    addr.value = 0;
1218    addr.bits.level = level0;
1219
1220    assert(width > 0);
1221    assert(height > 0);
1222    assert(depth > 0);
1223
1224    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1225    samp->linear_texcoord_t(t, height, y0, y1, yw);
1226    samp->linear_texcoord_p(p, depth,  z0, z1, zw);
1227
1228    for (j = 0; j < QUAD_SIZE; j++) {
1229       int c;
1230
1231       const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
1232       const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
1233       const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
1234       const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
1235
1236       const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
1237       const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
1238       const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
1239       const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
1240
1241       /* interpolate R, G, B, A */
1242       for (c = 0; c < 4; c++) {
1243          rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1244                               tx00[c], tx01[c],
1245                               tx02[c], tx03[c],
1246                               tx10[c], tx11[c],
1247                               tx12[c], tx13[c]);
1248       }
1249    }
1250 }
1251
1252
1253 static void
1254 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1255                   const float s[QUAD_SIZE],
1256                   const float t[QUAD_SIZE],
1257                   const float p[QUAD_SIZE],
1258                   float lodbias,
1259                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1260 {
1261    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1262    const struct pipe_texture *texture = samp->texture;
1263    int level0;
1264    float lambda;
1265
1266    lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1267    level0 = (int)lambda;
1268
1269    if (lambda < 0.0) {
1270       samp->level = 0;
1271       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1272    }
1273    else if (level0 >= texture->last_level) {
1274       samp->level = texture->last_level;
1275       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1276    }
1277    else {
1278       float levelBlend = lambda - level0;
1279       float rgba0[4][4];
1280       float rgba1[4][4];
1281       int c,j;
1282
1283       samp->level = level0;
1284       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
1285
1286       samp->level = level0+1;
1287       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
1288
1289       for (j = 0; j < QUAD_SIZE; j++) {
1290          for (c = 0; c < 4; c++) {
1291             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1292          }
1293       }
1294    }
1295 }
1296
1297
1298 static void
1299 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1300                    const float s[QUAD_SIZE],
1301                    const float t[QUAD_SIZE],
1302                    const float p[QUAD_SIZE],
1303                    float lodbias,
1304                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1305 {
1306    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1307    const struct pipe_texture *texture = samp->texture;
1308    float lambda;
1309
1310    lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1311
1312    if (lambda < 0.0) {
1313       samp->level = 0;
1314       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1315    }
1316    else {
1317       samp->level = (int)(lambda + 0.5) ;
1318       samp->level = MIN2(samp->level, (int)texture->last_level);
1319       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1320    }
1321
1322 #if 0
1323    printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1324           rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1325           rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1326           rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1327           rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1328 #endif
1329 }
1330
1331
1332 static void
1333 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1334                 const float s[QUAD_SIZE],
1335                 const float t[QUAD_SIZE],
1336                 const float p[QUAD_SIZE],
1337                 float lodbias,
1338                 float rgba[NUM_CHANNELS][QUAD_SIZE])
1339 {
1340    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1341    float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1342
1343    if (lambda < 0.0) {
1344       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1345    }
1346    else {
1347       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1348    }
1349 }
1350
1351
1352
1353 /**
1354  * Specialized version of mip_filter_linear with hard-wired calls to
1355  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1356  */
1357 static void
1358 mip_filter_linear_2d_linear_repeat_POT(
1359    struct tgsi_sampler *tgsi_sampler,
1360    const float s[QUAD_SIZE],
1361    const float t[QUAD_SIZE],
1362    const float p[QUAD_SIZE],
1363    float lodbias,
1364    float rgba[NUM_CHANNELS][QUAD_SIZE])
1365 {
1366    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1367    const struct pipe_texture *texture = samp->texture;
1368    int level0;
1369    float lambda;
1370
1371    lambda = compute_lambda_2d(samp, s, t, p, lodbias);
1372    level0 = (int)lambda;
1373
1374    /* Catches both negative and large values of level0:
1375     */
1376    if ((unsigned)level0 >= texture->last_level) {
1377       if (level0 < 0)
1378          samp->level = 0;
1379       else
1380          samp->level = texture->last_level;
1381
1382       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
1383    }
1384    else {
1385       float levelBlend = lambda - level0;
1386       float rgba0[4][4];
1387       float rgba1[4][4];
1388       int c,j;
1389
1390       samp->level = level0;
1391       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
1392
1393       samp->level = level0+1;
1394       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
1395
1396       for (j = 0; j < QUAD_SIZE; j++) {
1397          for (c = 0; c < 4; c++) {
1398             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1399          }
1400       }
1401    }
1402 }
1403
1404
1405
1406 /**
1407  * Do shadow/depth comparisons.
1408  */
1409 static void
1410 sample_compare(struct tgsi_sampler *tgsi_sampler,
1411                const float s[QUAD_SIZE],
1412                const float t[QUAD_SIZE],
1413                const float p[QUAD_SIZE],
1414                float lodbias,
1415                float rgba[NUM_CHANNELS][QUAD_SIZE])
1416 {
1417    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1418    const struct pipe_sampler_state *sampler = samp->sampler;
1419    int j, k0, k1, k2, k3;
1420    float val;
1421
1422    samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
1423
1424    /**
1425     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1426     * When we sampled the depth texture, the depth value was put into all
1427     * RGBA channels.  We look at the red channel here.
1428     */
1429
1430    /* compare four texcoords vs. four texture samples */
1431    switch (sampler->compare_func) {
1432    case PIPE_FUNC_LESS:
1433       k0 = p[0] < rgba[0][0];
1434       k1 = p[1] < rgba[0][1];
1435       k2 = p[2] < rgba[0][2];
1436       k3 = p[3] < rgba[0][3];
1437       break;
1438    case PIPE_FUNC_LEQUAL:
1439       k0 = p[0] <= rgba[0][0];
1440       k1 = p[1] <= rgba[0][1];
1441       k2 = p[2] <= rgba[0][2];
1442       k3 = p[3] <= rgba[0][3];
1443       break;
1444    case PIPE_FUNC_GREATER:
1445       k0 = p[0] > rgba[0][0];
1446       k1 = p[1] > rgba[0][1];
1447       k2 = p[2] > rgba[0][2];
1448       k3 = p[3] > rgba[0][3];
1449       break;
1450    case PIPE_FUNC_GEQUAL:
1451       k0 = p[0] >= rgba[0][0];
1452       k1 = p[1] >= rgba[0][1];
1453       k2 = p[2] >= rgba[0][2];
1454       k3 = p[3] >= rgba[0][3];
1455       break;
1456    case PIPE_FUNC_EQUAL:
1457       k0 = p[0] == rgba[0][0];
1458       k1 = p[1] == rgba[0][1];
1459       k2 = p[2] == rgba[0][2];
1460       k3 = p[3] == rgba[0][3];
1461       break;
1462    case PIPE_FUNC_NOTEQUAL:
1463       k0 = p[0] != rgba[0][0];
1464       k1 = p[1] != rgba[0][1];
1465       k2 = p[2] != rgba[0][2];
1466       k3 = p[3] != rgba[0][3];
1467       break;
1468    case PIPE_FUNC_ALWAYS:
1469       k0 = k1 = k2 = k3 = 1;
1470       break;
1471    case PIPE_FUNC_NEVER:
1472       k0 = k1 = k2 = k3 = 0;
1473       break;
1474    default:
1475       k0 = k1 = k2 = k3 = 0;
1476       assert(0);
1477       break;
1478    }
1479
1480    /* convert four pass/fail values to an intensity in [0,1] */
1481    val = 0.25F * (k0 + k1 + k2 + k3);
1482
1483    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1484    for (j = 0; j < 4; j++) {
1485       rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
1486       rgba[3][j] = 1.0F;
1487    }
1488 }
1489
1490
1491 /**
1492  * Compute which cube face is referenced by each texcoord and put that
1493  * info into the sampler faces[] array.  Then sample the cube faces
1494  */
1495 static void
1496 sample_cube(struct tgsi_sampler *tgsi_sampler,
1497             const float s[QUAD_SIZE],
1498             const float t[QUAD_SIZE],
1499             const float p[QUAD_SIZE],
1500             float lodbias,
1501             float rgba[NUM_CHANNELS][QUAD_SIZE])
1502 {
1503    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1504    unsigned j;
1505    float ssss[4], tttt[4];
1506
1507    /*
1508      major axis
1509      direction     target                             sc     tc    ma
1510      ----------    -------------------------------    ---    ---   ---
1511      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
1512      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
1513      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
1514      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
1515      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
1516      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
1517    */
1518    for (j = 0; j < QUAD_SIZE; j++) {
1519       float rx = s[j];
1520       float ry = t[j];
1521       float rz = p[j];
1522       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
1523       unsigned face;
1524       float sc, tc, ma;
1525
1526       if (arx >= ary && arx >= arz) {
1527          if (rx >= 0.0F) {
1528             face = PIPE_TEX_FACE_POS_X;
1529             sc = -rz;
1530             tc = -ry;
1531             ma = arx;
1532          }
1533          else {
1534             face = PIPE_TEX_FACE_NEG_X;
1535             sc = rz;
1536             tc = -ry;
1537             ma = arx;
1538          }
1539       }
1540       else if (ary >= arx && ary >= arz) {
1541          if (ry >= 0.0F) {
1542             face = PIPE_TEX_FACE_POS_Y;
1543             sc = rx;
1544             tc = rz;
1545             ma = ary;
1546          }
1547          else {
1548             face = PIPE_TEX_FACE_NEG_Y;
1549             sc = rx;
1550             tc = -rz;
1551             ma = ary;
1552          }
1553       }
1554       else {
1555          if (rz > 0.0F) {
1556             face = PIPE_TEX_FACE_POS_Z;
1557             sc = rx;
1558             tc = -ry;
1559             ma = arz;
1560          }
1561          else {
1562             face = PIPE_TEX_FACE_NEG_Z;
1563             sc = -rx;
1564             tc = -ry;
1565             ma = arz;
1566          }
1567       }
1568
1569       {
1570          const float ima = 1.0 / ma;
1571          ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
1572          tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
1573          samp->faces[j] = face;
1574       }
1575    }
1576
1577    /* In our little pipeline, the compare stage is next.  If compare
1578     * is not active, this will point somewhere deeper into the
1579     * pipeline, eg. to mip_filter or even img_filter.
1580     */
1581    samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
1582 }
1583
1584
1585
1586 static wrap_nearest_func
1587 get_nearest_unorm_wrap(unsigned mode)
1588 {
1589    switch (mode) {
1590    case PIPE_TEX_WRAP_CLAMP:
1591       return wrap_nearest_unorm_clamp;
1592    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1593    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1594       return wrap_nearest_unorm_clamp_to_border;
1595    default:
1596       assert(0);
1597       return wrap_nearest_unorm_clamp;
1598    }
1599 }
1600
1601
1602 static wrap_nearest_func
1603 get_nearest_wrap(unsigned mode)
1604 {
1605    switch (mode) {
1606    case PIPE_TEX_WRAP_REPEAT:
1607       return wrap_nearest_repeat;
1608    case PIPE_TEX_WRAP_CLAMP:
1609       return wrap_nearest_clamp;
1610    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1611       return wrap_nearest_clamp_to_edge;
1612    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1613       return wrap_nearest_clamp_to_border;
1614    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1615       return wrap_nearest_mirror_repeat;
1616    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1617       return wrap_nearest_mirror_clamp;
1618    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1619       return wrap_nearest_mirror_clamp_to_edge;
1620    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1621       return wrap_nearest_mirror_clamp_to_border;
1622    default:
1623       assert(0);
1624       return wrap_nearest_repeat;
1625    }
1626 }
1627
1628
1629 static wrap_linear_func
1630 get_linear_unorm_wrap(unsigned mode)
1631 {
1632    switch (mode) {
1633    case PIPE_TEX_WRAP_CLAMP:
1634       return wrap_linear_unorm_clamp;
1635    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1636    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1637       return wrap_linear_unorm_clamp_to_border;
1638    default:
1639       assert(0);
1640       return wrap_linear_unorm_clamp;
1641    }
1642 }
1643
1644
1645 static wrap_linear_func
1646 get_linear_wrap(unsigned mode)
1647 {
1648    switch (mode) {
1649    case PIPE_TEX_WRAP_REPEAT:
1650       return wrap_linear_repeat;
1651    case PIPE_TEX_WRAP_CLAMP:
1652       return wrap_linear_clamp;
1653    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1654       return wrap_linear_clamp_to_edge;
1655    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1656       return wrap_linear_clamp_to_border;
1657    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1658       return wrap_linear_mirror_repeat;
1659    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1660       return wrap_linear_mirror_clamp;
1661    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1662       return wrap_linear_mirror_clamp_to_edge;
1663    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1664       return wrap_linear_mirror_clamp_to_border;
1665    default:
1666       assert(0);
1667       return wrap_linear_repeat;
1668    }
1669 }
1670
1671
1672 static compute_lambda_func
1673 get_lambda_func(const union sp_sampler_key key)
1674 {
1675    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
1676       return compute_lambda_vert;
1677
1678    switch (key.bits.target) {
1679    case PIPE_TEXTURE_1D:
1680       return compute_lambda_1d;
1681    case PIPE_TEXTURE_2D:
1682    case PIPE_TEXTURE_CUBE:
1683       return compute_lambda_2d;
1684    case PIPE_TEXTURE_3D:
1685       return compute_lambda_3d;
1686    default:
1687       assert(0);
1688       return compute_lambda_1d;
1689    }
1690 }
1691
1692
1693 static filter_func
1694 get_img_filter(const union sp_sampler_key key,
1695                unsigned filter,
1696                const struct pipe_sampler_state *sampler)
1697 {
1698    switch (key.bits.target) {
1699    case PIPE_TEXTURE_1D:
1700       if (filter == PIPE_TEX_FILTER_NEAREST)
1701          return img_filter_1d_nearest;
1702       else
1703          return img_filter_1d_linear;
1704       break;
1705    case PIPE_TEXTURE_2D:
1706       /* Try for fast path:
1707        */
1708       if (key.bits.is_pot &&
1709           sampler->wrap_s == sampler->wrap_t &&
1710           sampler->normalized_coords)
1711       {
1712          switch (sampler->wrap_s) {
1713          case PIPE_TEX_WRAP_REPEAT:
1714             switch (filter) {
1715             case PIPE_TEX_FILTER_NEAREST:
1716                return img_filter_2d_nearest_repeat_POT;
1717             case PIPE_TEX_FILTER_LINEAR:
1718                return img_filter_2d_linear_repeat_POT;
1719             default:
1720                break;
1721             }
1722             break;
1723          case PIPE_TEX_WRAP_CLAMP:
1724             switch (filter) {
1725             case PIPE_TEX_FILTER_NEAREST:
1726                return img_filter_2d_nearest_clamp_POT;
1727             default:
1728                break;
1729             }
1730          }
1731       }
1732       /* Otherwise use default versions:
1733        */
1734       if (filter == PIPE_TEX_FILTER_NEAREST)
1735          return img_filter_2d_nearest;
1736       else
1737          return img_filter_2d_linear;
1738       break;
1739    case PIPE_TEXTURE_CUBE:
1740       if (filter == PIPE_TEX_FILTER_NEAREST)
1741          return img_filter_cube_nearest;
1742       else
1743          return img_filter_cube_linear;
1744       break;
1745    case PIPE_TEXTURE_3D:
1746       if (filter == PIPE_TEX_FILTER_NEAREST)
1747          return img_filter_3d_nearest;
1748       else
1749          return img_filter_3d_linear;
1750       break;
1751    default:
1752       assert(0);
1753       return img_filter_1d_nearest;
1754    }
1755 }
1756
1757
1758 /**
1759  * Bind the given texture object and texture cache to the sampler varient.
1760  */
1761 void
1762 sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
1763                                  struct softpipe_tex_tile_cache *tex_cache,
1764                                  const struct pipe_texture *texture )
1765 {
1766    const struct pipe_sampler_state *sampler = samp->sampler;
1767
1768    samp->texture = texture;
1769    samp->cache = tex_cache;
1770    samp->xpot = util_unsigned_logbase2( texture->width[0] );
1771    samp->ypot = util_unsigned_logbase2( texture->height[0] );
1772    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
1773 }
1774
1775
1776 void
1777 sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
1778 {
1779    FREE(samp);
1780 }
1781
1782
1783 /**
1784  * Create a sampler varient for a given set of non-orthogonal state.
1785  */
1786 struct sp_sampler_varient *
1787 sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
1788                            const union sp_sampler_key key )
1789 {
1790    struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
1791    if (!samp)
1792       return NULL;
1793
1794    samp->sampler = sampler;
1795    samp->key = key;
1796
1797    /* Note that (for instance) linear_texcoord_s and
1798     * nearest_texcoord_s may be active at the same time, if the
1799     * sampler min_img_filter differs from its mag_img_filter.
1800     */
1801    if (sampler->normalized_coords) {
1802       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
1803       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
1804       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
1805
1806       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
1807       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
1808       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
1809    }
1810    else {
1811       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
1812       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
1813       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
1814
1815       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
1816       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
1817       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
1818    }
1819
1820    samp->compute_lambda = get_lambda_func( key );
1821
1822    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
1823    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
1824
1825    switch (sampler->min_mip_filter) {
1826    case PIPE_TEX_MIPFILTER_NONE:
1827       if (sampler->min_img_filter == sampler->mag_img_filter)
1828          samp->mip_filter = samp->min_img_filter;
1829       else
1830          samp->mip_filter = mip_filter_none;
1831       break;
1832
1833    case PIPE_TEX_MIPFILTER_NEAREST:
1834       samp->mip_filter = mip_filter_nearest;
1835       break;
1836
1837    case PIPE_TEX_MIPFILTER_LINEAR:
1838       if (key.bits.is_pot &&
1839           sampler->min_img_filter == sampler->mag_img_filter &&
1840           sampler->normalized_coords &&
1841           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
1842           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
1843           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
1844       {
1845          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
1846       }
1847       else
1848       {
1849          samp->mip_filter = mip_filter_linear;
1850       }
1851       break;
1852    }
1853
1854    if (sampler->compare_mode != FALSE) {
1855       samp->compare = sample_compare;
1856    }
1857    else {
1858       /* Skip compare operation by promoting the mip_filter function
1859        * pointer:
1860        */
1861       samp->compare = samp->mip_filter;
1862    }
1863
1864    if (key.bits.target == PIPE_TEXTURE_CUBE) {
1865       samp->base.get_samples = sample_cube;
1866    }
1867    else {
1868       samp->faces[0] = 0;
1869       samp->faces[1] = 0;
1870       samp->faces[2] = 0;
1871       samp->faces[3] = 0;
1872
1873       /* Skip cube face determination by promoting the compare
1874        * function pointer:
1875        */
1876       samp->base.get_samples = samp->compare;
1877    }
1878
1879    return samp;
1880 }