src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_memory.h"
  42 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  43 #include "sp_tex_sample.h"
  44 #include "sp_tex_tile_cache.h"
  45
  46
  47
  48 /*
  49  * Return fractional part of 'f'.  Used for computing interpolation weights.
  50  * Need to be careful with negative values.
  51  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  52  * of improperly weighted linear-filtered textures.
  53  * The tests/texwrap.c demo is a good test.
  54  */
  55 static INLINE float
  56 frac(float f)
  57 {
  58    return f - util_ifloor(f);
  59 }
  60
  61
  62
  63 /**
  64  * Linear interpolation macro
  65  */
  66 static INLINE float
  67 lerp(float a, float v0, float v1)
  68 {
  69    return v0 + a * (v1 - v0);
  70 }
  71
  72
  73 /**
  74  * Do 2D/biliner interpolation of float values.
  75  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  76  * a and b are the horizontal and vertical interpolants.
  77  * It's important that this function is inlined when compiled with
  78  * optimization!  If we find that's not true on some systems, convert
  79  * to a macro.
  80  */
  81 static INLINE float
  82 lerp_2d(float a, float b,
  83         float v00, float v10, float v01, float v11)
  84 {
  85    const float temp0 = lerp(a, v00, v10);
  86    const float temp1 = lerp(a, v01, v11);
  87    return lerp(b, temp0, temp1);
  88 }
  89
  90
  91 /**
  92  * As above, but 3D interpolation of 8 values.
  93  */
  94 static INLINE float
  95 lerp_3d(float a, float b, float c,
  96         float v000, float v100, float v010, float v110,
  97         float v001, float v101, float v011, float v111)
  98 {
  99    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 100    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 101    return lerp(c, temp0, temp1);
 102 }
 103
 104
 105
 106 /**
 107  * Compute coord % size for repeat wrap modes.
 108  * Note that if coord is a signed integer, coord % size doesn't give
 109  * the right value for coord < 0 (in terms of texture repeat).  Just
 110  * casting to unsigned fixes that.
 111  */
 112 static INLINE int
 113 repeat(int coord, unsigned size)
 114 {
 115    return (int) ((unsigned) coord % size);
 116 }
 117
 118
 119 /**
 120  * Apply texture coord wrapping mode and return integer texture indexes
 121  * for a vector of four texcoords (S or T or P).
 122  * \param wrapMode  PIPE_TEX_WRAP_x
 123  * \param s  the incoming texcoords
 124  * \param size  the texture image size
 125  * \param icoord  returns the integer texcoords
 126  * \return  integer texture index
 127  */
 128 static void
 129 wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
 130 {
 131    uint ch;
 132    /* s limited to [0,1) */
 133    /* i limited to [0,size-1] */
 134    for (ch = 0; ch < 4; ch++) {
 135       int i = util_ifloor(s[ch] * size);
 136       icoord[ch] = repeat(i, size);
 137    }
 138 }
 139
 140
 141 static void
 142 wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
 143 {
 144    uint ch;
 145    /* s limited to [0,1] */
 146    /* i limited to [0,size-1] */
 147    for (ch = 0; ch < 4; ch++) {
 148       if (s[ch] <= 0.0F)
 149          icoord[ch] = 0;
 150       else if (s[ch] >= 1.0F)
 151          icoord[ch] = size - 1;
 152       else
 153          icoord[ch] = util_ifloor(s[ch] * size);
 154    }
 155 }
 156
 157
 158 static void
 159 wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4])
 160 {
 161    uint ch;
 162    /* s limited to [min,max] */
 163    /* i limited to [0, size-1] */
 164    const float min = 1.0F / (2.0F * size);
 165    const float max = 1.0F - min;
 166    for (ch = 0; ch < 4; ch++) {
 167       if (s[ch] < min)
 168          icoord[ch] = 0;
 169       else if (s[ch] > max)
 170          icoord[ch] = size - 1;
 171       else
 172          icoord[ch] = util_ifloor(s[ch] * size);
 173    }
 174 }
 175
 176
 177 static void
 178 wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4])
 179 {
 180    uint ch;
 181    /* s limited to [min,max] */
 182    /* i limited to [-1, size] */
 183    const float min = -1.0F / (2.0F * size);
 184    const float max = 1.0F - min;
 185    for (ch = 0; ch < 4; ch++) {
 186       if (s[ch] <= min)
 187          icoord[ch] = -1;
 188       else if (s[ch] >= max)
 189          icoord[ch] = size;
 190       else
 191          icoord[ch] = util_ifloor(s[ch] * size);
 192    }
 193 }
 194
 195
 196 static void
 197 wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4])
 198 {
 199    uint ch;
 200    const float min = 1.0F / (2.0F * size);
 201    const float max = 1.0F - min;
 202    for (ch = 0; ch < 4; ch++) {
 203       const int flr = util_ifloor(s[ch]);
 204       float u;
 205       if (flr & 1)
 206          u = 1.0F - (s[ch] - (float) flr);
 207       else
 208          u = s[ch] - (float) flr;
 209       if (u < min)
 210          icoord[ch] = 0;
 211       else if (u > max)
 212          icoord[ch] = size - 1;
 213       else
 214          icoord[ch] = util_ifloor(u * size);
 215    }
 216 }
 217
 218
 219 static void
 220 wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4])
 221 {
 222    uint ch;
 223    for (ch = 0; ch < 4; ch++) {
 224       /* s limited to [0,1] */
 225       /* i limited to [0,size-1] */
 226       const float u = fabsf(s[ch]);
 227       if (u <= 0.0F)
 228          icoord[ch] = 0;
 229       else if (u >= 1.0F)
 230          icoord[ch] = size - 1;
 231       else
 232          icoord[ch] = util_ifloor(u * size);
 233    }
 234 }
 235
 236
 237 static void
 238 wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
 239                                   int icoord[4])
 240 {
 241    uint ch;
 242    /* s limited to [min,max] */
 243    /* i limited to [0, size-1] */
 244    const float min = 1.0F / (2.0F * size);
 245    const float max = 1.0F - min;
 246    for (ch = 0; ch < 4; ch++) {
 247       const float u = fabsf(s[ch]);
 248       if (u < min)
 249          icoord[ch] = 0;
 250       else if (u > max)
 251          icoord[ch] = size - 1;
 252       else
 253          icoord[ch] = util_ifloor(u * size);
 254    }
 255 }
 256
 257
 258 static void
 259 wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
 260                                     int icoord[4])
 261 {
 262    uint ch;
 263    /* s limited to [min,max] */
 264    /* i limited to [0, size-1] */
 265    const float min = -1.0F / (2.0F * size);
 266    const float max = 1.0F - min;
 267    for (ch = 0; ch < 4; ch++) {
 268       const float u = fabsf(s[ch]);
 269       if (u < min)
 270          icoord[ch] = -1;
 271       else if (u > max)
 272          icoord[ch] = size;
 273       else
 274          icoord[ch] = util_ifloor(u * size);
 275    }
 276 }
 277
 278
 279 /**
 280  * Used to compute texel locations for linear sampling for four texcoords.
 281  * \param wrapMode  PIPE_TEX_WRAP_x
 282  * \param s  the texcoords
 283  * \param size  the texture image size
 284  * \param icoord0  returns first texture indexes
 285  * \param icoord1  returns second texture indexes (usually icoord0 + 1)
 286  * \param w  returns blend factor/weight between texture indexes
 287  * \param icoord  returns the computed integer texture coords
 288  */
 289 static void
 290 wrap_linear_repeat(const float s[4], unsigned size,
 291                    int icoord0[4], int icoord1[4], float w[4])
 292 {
 293    uint ch;
 294    for (ch = 0; ch < 4; ch++) {
 295       float u = s[ch] * size - 0.5F;
 296       icoord0[ch] = repeat(util_ifloor(u), size);
 297       icoord1[ch] = repeat(icoord0[ch] + 1, size);
 298       w[ch] = frac(u);
 299    }
 300 }
 301
 302
 303 static void
 304 wrap_linear_clamp(const float s[4], unsigned size,
 305                   int icoord0[4], int icoord1[4], float w[4])
 306 {
 307    uint ch;
 308    for (ch = 0; ch < 4; ch++) {
 309       float u = CLAMP(s[ch], 0.0F, 1.0F);
 310       u = u * size - 0.5f;
 311       icoord0[ch] = util_ifloor(u);
 312       icoord1[ch] = icoord0[ch] + 1;
 313       w[ch] = frac(u);
 314    }
 315 }
 316
 317
 318 static void
 319 wrap_linear_clamp_to_edge(const float s[4], unsigned size,
 320                           int icoord0[4], int icoord1[4], float w[4])
 321 {
 322    uint ch;
 323    for (ch = 0; ch < 4; ch++) {
 324       float u = CLAMP(s[ch], 0.0F, 1.0F);
 325       u = u * size - 0.5f;
 326       icoord0[ch] = util_ifloor(u);
 327       icoord1[ch] = icoord0[ch] + 1;
 328       if (icoord0[ch] < 0)
 329          icoord0[ch] = 0;
 330       if (icoord1[ch] >= (int) size)
 331          icoord1[ch] = size - 1;
 332       w[ch] = frac(u);
 333    }
 334 }
 335
 336
 337 static void
 338 wrap_linear_clamp_to_border(const float s[4], unsigned size,
 339                             int icoord0[4], int icoord1[4], float w[4])
 340 {
 341    const float min = -1.0F / (2.0F * size);
 342    const float max = 1.0F - min;
 343    uint ch;
 344    for (ch = 0; ch < 4; ch++) {
 345       float u = CLAMP(s[ch], min, max);
 346       u = u * size - 0.5f;
 347       icoord0[ch] = util_ifloor(u);
 348       icoord1[ch] = icoord0[ch] + 1;
 349       w[ch] = frac(u);
 350    }
 351 }
 352
 353
 354 static void
 355 wrap_linear_mirror_repeat(const float s[4], unsigned size,
 356                           int icoord0[4], int icoord1[4], float w[4])
 357 {
 358    uint ch;
 359    for (ch = 0; ch < 4; ch++) {
 360       const int flr = util_ifloor(s[ch]);
 361       float u;
 362       if (flr & 1)
 363          u = 1.0F - (s[ch] - (float) flr);
 364       else
 365          u = s[ch] - (float) flr;
 366       u = u * size - 0.5F;
 367       icoord0[ch] = util_ifloor(u);
 368       icoord1[ch] = icoord0[ch] + 1;
 369       if (icoord0[ch] < 0)
 370          icoord0[ch] = 0;
 371       if (icoord1[ch] >= (int) size)
 372          icoord1[ch] = size - 1;
 373       w[ch] = frac(u);
 374    }
 375 }
 376
 377
 378 static void
 379 wrap_linear_mirror_clamp(const float s[4], unsigned size,
 380                          int icoord0[4], int icoord1[4], float w[4])
 381 {
 382    uint ch;
 383    for (ch = 0; ch < 4; ch++) {
 384       float u = fabsf(s[ch]);
 385       if (u >= 1.0F)
 386          u = (float) size;
 387       else
 388          u *= size;
 389       u -= 0.5F;
 390       icoord0[ch] = util_ifloor(u);
 391       icoord1[ch] = icoord0[ch] + 1;
 392       w[ch] = frac(u);
 393    }
 394 }
 395
 396
 397 static void
 398 wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
 399                                  int icoord0[4], int icoord1[4], float w[4])
 400 {
 401    uint ch;
 402    for (ch = 0; ch < 4; ch++) {
 403       float u = fabsf(s[ch]);
 404       if (u >= 1.0F)
 405          u = (float) size;
 406       else
 407          u *= size;
 408       u -= 0.5F;
 409       icoord0[ch] = util_ifloor(u);
 410       icoord1[ch] = icoord0[ch] + 1;
 411       if (icoord0[ch] < 0)
 412          icoord0[ch] = 0;
 413       if (icoord1[ch] >= (int) size)
 414          icoord1[ch] = size - 1;
 415       w[ch] = frac(u);
 416    }
 417 }
 418
 419
 420 static void
 421 wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
 422                                    int icoord0[4], int icoord1[4], float w[4])
 423 {
 424    const float min = -1.0F / (2.0F * size);
 425    const float max = 1.0F - min;
 426    uint ch;
 427    for (ch = 0; ch < 4; ch++) {
 428       float u = fabsf(s[ch]);
 429       if (u <= min)
 430          u = min * size;
 431       else if (u >= max)
 432          u = max * size;
 433       else
 434          u *= size;
 435       u -= 0.5F;
 436       icoord0[ch] = util_ifloor(u);
 437       icoord1[ch] = icoord0[ch] + 1;
 438       w[ch] = frac(u);
 439    }
 440 }
 441
 442
 443 /**
 444  * For RECT textures / unnormalized texcoords
 445  * Only a subset of wrap modes supported.
 446  */
 447 static void
 448 wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4])
 449 {
 450    uint ch;
 451    for (ch = 0; ch < 4; ch++) {
 452       int i = util_ifloor(s[ch]);
 453       icoord[ch]= CLAMP(i, 0, (int) size-1);
 454    }
 455 }
 456
 457
 458 /**
 459  * Handles clamp_to_edge and clamp_to_border:
 460  */
 461 static void
 462 wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
 463                                    int icoord[4])
 464 {
 465    uint ch;
 466    for (ch = 0; ch < 4; ch++) {
 467       icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
 468    }
 469 }
 470
 471
 472 /**
 473  * For RECT textures / unnormalized texcoords.
 474  * Only a subset of wrap modes supported.
 475  */
 476 static void
 477 wrap_linear_unorm_clamp(const float s[4], unsigned size,
 478                         int icoord0[4], int icoord1[4], float w[4])
 479 {
 480    uint ch;
 481    for (ch = 0; ch < 4; ch++) {
 482       /* Not exactly what the spec says, but it matches NVIDIA output */
 483       float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
 484       icoord0[ch] = util_ifloor(u);
 485       icoord1[ch] = icoord0[ch] + 1;
 486       w[ch] = frac(u);
 487    }
 488 }
 489
 490
 491 static void
 492 wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
 493                                   int icoord0[4], int icoord1[4], float w[4])
 494 {
 495    uint ch;
 496    for (ch = 0; ch < 4; ch++) {
 497       float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
 498       u -= 0.5F;
 499       icoord0[ch] = util_ifloor(u);
 500       icoord1[ch] = icoord0[ch] + 1;
 501       if (icoord1[ch] > (int) size - 1)
 502          icoord1[ch] = size - 1;
 503       w[ch] = frac(u);
 504    }
 505 }
 506
 507
 508
 509 /**
 510  * Examine the quad's texture coordinates to compute the partial
 511  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 512  */
 513 static float
 514 compute_lambda_1d(const struct sp_sampler_varient *samp,
 515                   const float s[QUAD_SIZE],
 516                   const float t[QUAD_SIZE],
 517                   const float p[QUAD_SIZE],
 518                   float lodbias)
 519 {
 520    const struct pipe_texture *texture = samp->texture;
 521    const struct pipe_sampler_state *sampler = samp->sampler;
 522    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 523    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 524    float rho = MAX2(dsdx, dsdy) * texture->width[0];
 525    float lambda;
 526
 527    lambda = util_fast_log2(rho);
 528    lambda += lodbias + sampler->lod_bias;
 529    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 530
 531    return lambda;
 532 }
 533
 534
 535 static float
 536 compute_lambda_2d(const struct sp_sampler_varient *samp,
 537                   const float s[QUAD_SIZE],
 538                   const float t[QUAD_SIZE],
 539                   const float p[QUAD_SIZE],
 540                   float lodbias)
 541 {
 542    const struct pipe_texture *texture = samp->texture;
 543    const struct pipe_sampler_state *sampler = samp->sampler;
 544    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 545    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 546    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 547    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 548    float maxx = MAX2(dsdx, dsdy) * texture->width[0];
 549    float maxy = MAX2(dtdx, dtdy) * texture->height[0];
 550    float rho  = MAX2(maxx, maxy);
 551    float lambda;
 552
 553    lambda = util_fast_log2(rho);
 554    lambda += lodbias + sampler->lod_bias;
 555    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 556
 557    return lambda;
 558 }
 559
 560
 561 static float
 562 compute_lambda_3d(const struct sp_sampler_varient *samp,
 563                   const float s[QUAD_SIZE],
 564                   const float t[QUAD_SIZE],
 565                   const float p[QUAD_SIZE],
 566                   float lodbias)
 567 {
 568    const struct pipe_texture *texture = samp->texture;
 569    const struct pipe_sampler_state *sampler = samp->sampler;
 570    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 571    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 572    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 573    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 574    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 575    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 576    float maxx = MAX2(dsdx, dsdy) * texture->width[0];
 577    float maxy = MAX2(dtdx, dtdy) * texture->height[0];
 578    float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
 579    float rho, lambda;
 580
 581    rho = MAX2(maxx, maxy);
 582    rho = MAX2(rho, maxz);
 583
 584    lambda = util_fast_log2(rho);
 585    lambda += lodbias + sampler->lod_bias;
 586    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 587
 588    return lambda;
 589 }
 590
 591
 592 /**
 593  * Compute lambda for a vertex texture sampler.
 594  * Since there aren't derivatives to use, just return the LOD bias.
 595  */
 596 static float
 597 compute_lambda_vert(const struct sp_sampler_varient *samp,
 598                     const float s[QUAD_SIZE],
 599                     const float t[QUAD_SIZE],
 600                     const float p[QUAD_SIZE],
 601                     float lodbias)
 602 {
 603    return lodbias;
 604 }
 605
 606
 607
 608 /**
 609  * Get a texel from a texture, using the texture tile cache.
 610  *
 611  * \param addr  the template tex address containing cube, z, face info.
 612  * \param x  the x coord of texel within 2D image
 613  * \param y  the y coord of texel within 2D image
 614  * \param rgba  the quad to put the texel/color into
 615  *
 616  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 617  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
 618  */
 619
 620
 621
 622
 623 static INLINE const float *
 624 get_texel_2d_no_border(const struct sp_sampler_varient *samp,
 625                        union tex_tile_address addr, int x, int y)
 626 {
 627    const struct softpipe_tex_cached_tile *tile;
 628
 629    addr.bits.x = x / TILE_SIZE;
 630    addr.bits.y = y / TILE_SIZE;
 631    y %= TILE_SIZE;
 632    x %= TILE_SIZE;
 633
 634    tile = sp_get_cached_tile_tex(samp->cache, addr);
 635
 636    return &tile->data.color[y][x][0];
 637 }
 638
 639
 640 static INLINE const float *
 641 get_texel_2d(const struct sp_sampler_varient *samp,
 642              union tex_tile_address addr, int x, int y)
 643 {
 644    const struct pipe_texture *texture = samp->texture;
 645    unsigned level = addr.bits.level;
 646
 647    if (x < 0 || x >= (int) texture->width[level] ||
 648        y < 0 || y >= (int) texture->height[level]) {
 649       return samp->sampler->border_color;
 650    }
 651    else {
 652       return get_texel_2d_no_border( samp, addr, x, y );
 653    }
 654 }
 655
 656
 657 /* Gather a quad of adjacent texels within a tile:
 658  */
 659 static INLINE void
 660 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
 661                                         union tex_tile_address addr,
 662                                         unsigned x, unsigned y,
 663                                         const float *out[4])
 664 {
 665    const struct softpipe_tex_cached_tile *tile;
 666
 667    addr.bits.x = x / TILE_SIZE;
 668    addr.bits.y = y / TILE_SIZE;
 669    y %= TILE_SIZE;
 670    x %= TILE_SIZE;
 671
 672    tile = sp_get_cached_tile_tex(samp->cache, addr);
 673
 674    out[0] = &tile->data.color[y  ][x  ][0];
 675    out[1] = &tile->data.color[y  ][x+1][0];
 676    out[2] = &tile->data.color[y+1][x  ][0];
 677    out[3] = &tile->data.color[y+1][x+1][0];
 678 }
 679
 680
 681 /* Gather a quad of potentially non-adjacent texels:
 682  */
 683 static INLINE void
 684 get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
 685                             union tex_tile_address addr,
 686                             int x0, int y0,
 687                             int x1, int y1,
 688                             const float *out[4])
 689 {
 690    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
 691    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
 692    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
 693    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
 694 }
 695
 696 /* Can involve a lot of unnecessary checks for border color:
 697  */
 698 static INLINE void
 699 get_texel_quad_2d(const struct sp_sampler_varient *samp,
 700                   union tex_tile_address addr,
 701                   int x0, int y0,
 702                   int x1, int y1,
 703                   const float *out[4])
 704 {
 705    out[0] = get_texel_2d( samp, addr, x0, y0 );
 706    out[1] = get_texel_2d( samp, addr, x1, y0 );
 707    out[3] = get_texel_2d( samp, addr, x1, y1 );
 708    out[2] = get_texel_2d( samp, addr, x0, y1 );
 709 }
 710
 711
 712
 713 /* 3d varients:
 714  */
 715 static INLINE const float *
 716 get_texel_3d_no_border(const struct sp_sampler_varient *samp,
 717                        union tex_tile_address addr, int x, int y, int z)
 718 {
 719    const struct softpipe_tex_cached_tile *tile;
 720
 721    addr.bits.x = x / TILE_SIZE;
 722    addr.bits.y = y / TILE_SIZE;
 723    addr.bits.z = z;
 724    y %= TILE_SIZE;
 725    x %= TILE_SIZE;
 726
 727    tile = sp_get_cached_tile_tex(samp->cache, addr);
 728
 729    return &tile->data.color[y][x][0];
 730 }
 731
 732
 733 static INLINE const float *
 734 get_texel_3d(const struct sp_sampler_varient *samp,
 735              union tex_tile_address addr, int x, int y, int z)
 736 {
 737    const struct pipe_texture *texture = samp->texture;
 738    unsigned level = addr.bits.level;
 739
 740    if (x < 0 || x >= (int) texture->width[level] ||
 741        y < 0 || y >= (int) texture->height[level] ||
 742        z < 0 || z >= (int) texture->depth[level]) {
 743       return samp->sampler->border_color;
 744    }
 745    else {
 746       return get_texel_3d_no_border( samp, addr, x, y, z );
 747    }
 748 }
 749
 750
 751 /**
 752  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 753  * return the size (in texels) of that mipmap level.
 754  * For example, if level[0].width = 256 then base_pot will be 8.
 755  * If level = 2, then we'll return 64 (the width at level=2).
 756  * Return 1 if level > base_pot.
 757  */
 758 static INLINE unsigned
 759 pot_level_size(unsigned base_pot, unsigned level)
 760 {
 761    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 762 }
 763
 764
 765 /* Some image-filter fastpaths:
 766  */
 767 static INLINE void
 768 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 769                                 const float s[QUAD_SIZE],
 770                                 const float t[QUAD_SIZE],
 771                                 const float p[QUAD_SIZE],
 772                                 float lodbias,
 773                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 774 {
 775    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 776    unsigned  j;
 777    unsigned level = samp->level;
 778    unsigned xpot = pot_level_size(samp->xpot, level);
 779    unsigned ypot = pot_level_size(samp->ypot, level);
 780    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 781    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 782    union tex_tile_address addr;
 783
 784    addr.value = 0;
 785    addr.bits.level = samp->level;
 786
 787    for (j = 0; j < QUAD_SIZE; j++) {
 788       int c;
 789
 790       float u = s[j] * xpot - 0.5F;
 791       float v = t[j] * ypot - 0.5F;
 792
 793       int uflr = util_ifloor(u);
 794       int vflr = util_ifloor(v);
 795
 796       float xw = u - (float)uflr;
 797       float yw = v - (float)vflr;
 798
 799       int x0 = uflr & (xpot - 1);
 800       int y0 = vflr & (ypot - 1);
 801
 802       const float *tx[4];
 803
 804       /* Can we fetch all four at once:
 805        */
 806       if (x0 < xmax && y0 < ymax) {
 807          get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
 808       }
 809       else {
 810          unsigned x1 = (x0 + 1) & (xpot - 1);
 811          unsigned y1 = (y0 + 1) & (ypot - 1);
 812          get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
 813       }
 814
 815       /* interpolate R, G, B, A */
 816       for (c = 0; c < 4; c++) {
 817          rgba[c][j] = lerp_2d(xw, yw,
 818                               tx[0][c], tx[1][c],
 819                               tx[2][c], tx[3][c]);
 820       }
 821    }
 822 }
 823
 824
 825 static INLINE void
 826 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 827                                  const float s[QUAD_SIZE],
 828                                  const float t[QUAD_SIZE],
 829                                  const float p[QUAD_SIZE],
 830                                  float lodbias,
 831                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 832 {
 833    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 834    unsigned  j;
 835    unsigned level = samp->level;
 836    unsigned xpot = pot_level_size(samp->xpot, level);
 837    unsigned ypot = pot_level_size(samp->ypot, level);
 838    union tex_tile_address addr;
 839
 840    addr.value = 0;
 841    addr.bits.level = samp->level;
 842
 843    for (j = 0; j < QUAD_SIZE; j++) {
 844       int c;
 845
 846       float u = s[j] * xpot;
 847       float v = t[j] * ypot;
 848
 849       int uflr = util_ifloor(u);
 850       int vflr = util_ifloor(v);
 851
 852       int x0 = uflr & (xpot - 1);
 853       int y0 = vflr & (ypot - 1);
 854
 855       const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
 856
 857       for (c = 0; c < 4; c++) {
 858          rgba[c][j] = out[c];
 859       }
 860    }
 861 }
 862
 863
 864 static INLINE void
 865 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 866                                 const float s[QUAD_SIZE],
 867                                 const float t[QUAD_SIZE],
 868                                 const float p[QUAD_SIZE],
 869                                 float lodbias,
 870                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 871 {
 872    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 873    unsigned  j;
 874    unsigned level = samp->level;
 875    unsigned xpot = pot_level_size(samp->xpot, level);
 876    unsigned ypot = pot_level_size(samp->ypot, level);
 877    union tex_tile_address addr;
 878
 879    addr.value = 0;
 880    addr.bits.level = samp->level;
 881
 882    for (j = 0; j < QUAD_SIZE; j++) {
 883       int c;
 884
 885       float u = s[j] * xpot;
 886       float v = t[j] * ypot;
 887
 888       int x0, y0;
 889       const float *out;
 890
 891       x0 = util_ifloor(u);
 892       if (x0 < 0)
 893          x0 = 0;
 894       else if (x0 > xpot - 1)
 895          x0 = xpot - 1;
 896
 897       y0 = util_ifloor(v);
 898       if (y0 < 0)
 899          y0 = 0;
 900       else if (y0 > ypot - 1)
 901          y0 = ypot - 1;
 902
 903       out = get_texel_2d_no_border(samp, addr, x0, y0);
 904
 905       for (c = 0; c < 4; c++) {
 906          rgba[c][j] = out[c];
 907       }
 908    }
 909 }
 910
 911
 912 static void
 913 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
 914                         const float s[QUAD_SIZE],
 915                         const float t[QUAD_SIZE],
 916                         const float p[QUAD_SIZE],
 917                         float lodbias,
 918                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 919 {
 920    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 921    const struct pipe_texture *texture = samp->texture;
 922    unsigned level0, j;
 923    int width;
 924    int x[4];
 925    union tex_tile_address addr;
 926
 927    level0 = samp->level;
 928    width = texture->width[level0];
 929
 930    assert(width > 0);
 931
 932    addr.value = 0;
 933    addr.bits.level = samp->level;
 934
 935    samp->nearest_texcoord_s(s, width, x);
 936
 937    for (j = 0; j < QUAD_SIZE; j++) {
 938       const float *out = get_texel_2d(samp, addr, x[j], 0);
 939       int c;
 940       for (c = 0; c < 4; c++) {
 941          rgba[c][j] = out[c];
 942       }
 943    }
 944 }
 945
 946
 947 static void
 948 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
 949                       const float s[QUAD_SIZE],
 950                       const float t[QUAD_SIZE],
 951                       const float p[QUAD_SIZE],
 952                       float lodbias,
 953                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 954 {
 955    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 956    const struct pipe_texture *texture = samp->texture;
 957    unsigned level0, j;
 958    int width, height;
 959    int x[4], y[4];
 960    union tex_tile_address addr;
 961
 962
 963    level0 = samp->level;
 964    width = texture->width[level0];
 965    height = texture->height[level0];
 966
 967    assert(width > 0);
 968    assert(height > 0);
 969
 970    addr.value = 0;
 971    addr.bits.level = samp->level;
 972
 973    samp->nearest_texcoord_s(s, width, x);
 974    samp->nearest_texcoord_t(t, height, y);
 975
 976    for (j = 0; j < QUAD_SIZE; j++) {
 977       const float *out = get_texel_2d(samp, addr, x[j], y[j]);
 978       int c;
 979       for (c = 0; c < 4; c++) {
 980          rgba[c][j] = out[c];
 981       }
 982    }
 983 }
 984
 985
 986 static INLINE union tex_tile_address
 987 face(union tex_tile_address addr, unsigned face )
 988 {
 989    addr.bits.face = face;
 990    return addr;
 991 }
 992
 993
 994 static void
 995 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
 996                         const float s[QUAD_SIZE],
 997                         const float t[QUAD_SIZE],
 998                         const float p[QUAD_SIZE],
 999                         float lodbias,
1000                         float rgba[NUM_CHANNELS][QUAD_SIZE])
1001 {
1002    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1003    const struct pipe_texture *texture = samp->texture;
1004    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1005    unsigned level0, j;
1006    int width, height;
1007    int x[4], y[4];
1008    union tex_tile_address addr;
1009
1010    level0 = samp->level;
1011    width = texture->width[level0];
1012    height = texture->height[level0];
1013
1014    assert(width > 0);
1015    assert(height > 0);
1016
1017    addr.value = 0;
1018    addr.bits.level = samp->level;
1019
1020    samp->nearest_texcoord_s(s, width, x);
1021    samp->nearest_texcoord_t(t, height, y);
1022
1023    for (j = 0; j < QUAD_SIZE; j++) {
1024       const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
1025       int c;
1026       for (c = 0; c < 4; c++) {
1027          rgba[c][j] = out[c];
1028       }
1029    }
1030 }
1031
1032
1033 static void
1034 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1035                       const float s[QUAD_SIZE],
1036                       const float t[QUAD_SIZE],
1037                       const float p[QUAD_SIZE],
1038                       float lodbias,
1039                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1040 {
1041    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1042    const struct pipe_texture *texture = samp->texture;
1043    unsigned level0, j;
1044    int width, height, depth;
1045    int x[4], y[4], z[4];
1046    union tex_tile_address addr;
1047
1048    level0 = samp->level;
1049    width = texture->width[level0];
1050    height = texture->height[level0];
1051    depth = texture->depth[level0];
1052
1053    assert(width > 0);
1054    assert(height > 0);
1055    assert(depth > 0);
1056
1057    samp->nearest_texcoord_s(s, width,  x);
1058    samp->nearest_texcoord_t(t, height, y);
1059    samp->nearest_texcoord_p(p, depth,  z);
1060
1061    addr.value = 0;
1062    addr.bits.level = samp->level;
1063
1064    for (j = 0; j < QUAD_SIZE; j++) {
1065       const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
1066       int c;
1067       for (c = 0; c < 4; c++) {
1068          rgba[c][j] = out[c];
1069       }
1070    }
1071 }
1072
1073
1074 static void
1075 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1076                      const float s[QUAD_SIZE],
1077                      const float t[QUAD_SIZE],
1078                      const float p[QUAD_SIZE],
1079                      float lodbias,
1080                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1081 {
1082    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1083    const struct pipe_texture *texture = samp->texture;
1084    unsigned level0, j;
1085    int width;
1086    int x0[4], x1[4];
1087    float xw[4]; /* weights */
1088    union tex_tile_address addr;
1089
1090    level0 = samp->level;
1091    width = texture->width[level0];
1092
1093    assert(width > 0);
1094
1095    addr.value = 0;
1096    addr.bits.level = samp->level;
1097
1098    samp->linear_texcoord_s(s, width, x0, x1, xw);
1099
1100    for (j = 0; j < QUAD_SIZE; j++) {
1101       const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
1102       const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
1103       int c;
1104
1105       /* interpolate R, G, B, A */
1106       for (c = 0; c < 4; c++) {
1107          rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1108       }
1109    }
1110 }
1111
1112
1113 static void
1114 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1115                      const float s[QUAD_SIZE],
1116                      const float t[QUAD_SIZE],
1117                      const float p[QUAD_SIZE],
1118                      float lodbias,
1119                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1120 {
1121    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1122    const struct pipe_texture *texture = samp->texture;
1123    unsigned level0, j;
1124    int width, height;
1125    int x0[4], y0[4], x1[4], y1[4];
1126    float xw[4], yw[4]; /* weights */
1127    union tex_tile_address addr;
1128
1129    level0 = samp->level;
1130    width = texture->width[level0];
1131    height = texture->height[level0];
1132
1133    assert(width > 0);
1134    assert(height > 0);
1135
1136    addr.value = 0;
1137    addr.bits.level = samp->level;
1138
1139    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1140    samp->linear_texcoord_t(t, height, y0, y1, yw);
1141
1142    for (j = 0; j < QUAD_SIZE; j++) {
1143       const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
1144       const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
1145       const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
1146       const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
1147       int c;
1148
1149       /* interpolate R, G, B, A */
1150       for (c = 0; c < 4; c++) {
1151          rgba[c][j] = lerp_2d(xw[j], yw[j],
1152                               tx0[c], tx1[c],
1153                               tx2[c], tx3[c]);
1154       }
1155    }
1156 }
1157
1158
1159 static void
1160 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1161                        const float s[QUAD_SIZE],
1162                        const float t[QUAD_SIZE],
1163                        const float p[QUAD_SIZE],
1164                        float lodbias,
1165                        float rgba[NUM_CHANNELS][QUAD_SIZE])
1166 {
1167    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1168    const struct pipe_texture *texture = samp->texture;
1169    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1170    unsigned level0, j;
1171    int width, height;
1172    int x0[4], y0[4], x1[4], y1[4];
1173    float xw[4], yw[4]; /* weights */
1174    union tex_tile_address addr;
1175
1176    level0 = samp->level;
1177    width = texture->width[level0];
1178    height = texture->height[level0];
1179
1180    assert(width > 0);
1181    assert(height > 0);
1182
1183    addr.value = 0;
1184    addr.bits.level = samp->level;
1185
1186    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1187    samp->linear_texcoord_t(t, height, y0, y1, yw);
1188
1189    for (j = 0; j < QUAD_SIZE; j++) {
1190       union tex_tile_address addrj = face(addr, faces[j]);
1191       const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
1192       const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
1193       const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
1194       const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
1195       int c;
1196
1197       /* interpolate R, G, B, A */
1198       for (c = 0; c < 4; c++) {
1199          rgba[c][j] = lerp_2d(xw[j], yw[j],
1200                               tx0[c], tx1[c],
1201                               tx2[c], tx3[c]);
1202       }
1203    }
1204 }
1205
1206
1207 static void
1208 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1209                      const float s[QUAD_SIZE],
1210                      const float t[QUAD_SIZE],
1211                      const float p[QUAD_SIZE],
1212                      float lodbias,
1213                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1214 {
1215    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1216    const struct pipe_texture *texture = samp->texture;
1217    unsigned level0, j;
1218    int width, height, depth;
1219    int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1220    float xw[4], yw[4], zw[4]; /* interpolation weights */
1221    union tex_tile_address addr;
1222
1223    level0 = samp->level;
1224    width = texture->width[level0];
1225    height = texture->height[level0];
1226    depth = texture->depth[level0];
1227
1228    addr.value = 0;
1229    addr.bits.level = level0;
1230
1231    assert(width > 0);
1232    assert(height > 0);
1233    assert(depth > 0);
1234
1235    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1236    samp->linear_texcoord_t(t, height, y0, y1, yw);
1237    samp->linear_texcoord_p(p, depth,  z0, z1, zw);
1238
1239    for (j = 0; j < QUAD_SIZE; j++) {
1240       int c;
1241
1242       const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
1243       const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
1244       const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
1245       const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
1246
1247       const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
1248       const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
1249       const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
1250       const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
1251
1252       /* interpolate R, G, B, A */
1253       for (c = 0; c < 4; c++) {
1254          rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1255                               tx00[c], tx01[c],
1256                               tx02[c], tx03[c],
1257                               tx10[c], tx11[c],
1258                               tx12[c], tx13[c]);
1259       }
1260    }
1261 }
1262
1263
1264 static void
1265 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1266                   const float s[QUAD_SIZE],
1267                   const float t[QUAD_SIZE],
1268                   const float p[QUAD_SIZE],
1269                   float lodbias,
1270                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1271 {
1272    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1273    const struct pipe_texture *texture = samp->texture;
1274    int level0;
1275    float lambda;
1276
1277    lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1278    level0 = (int)lambda;
1279
1280    if (lambda < 0.0) {
1281       samp->level = 0;
1282       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1283    }
1284    else if (level0 >= texture->last_level) {
1285       samp->level = texture->last_level;
1286       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1287    }
1288    else {
1289       float levelBlend = lambda - level0;
1290       float rgba0[4][4];
1291       float rgba1[4][4];
1292       int c,j;
1293
1294       samp->level = level0;
1295       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
1296
1297       samp->level = level0+1;
1298       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
1299
1300       for (j = 0; j < QUAD_SIZE; j++) {
1301          for (c = 0; c < 4; c++) {
1302             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1303          }
1304       }
1305    }
1306 }
1307
1308
1309 static void
1310 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1311                    const float s[QUAD_SIZE],
1312                    const float t[QUAD_SIZE],
1313                    const float p[QUAD_SIZE],
1314                    float lodbias,
1315                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1316 {
1317    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1318    const struct pipe_texture *texture = samp->texture;
1319    float lambda;
1320
1321    lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1322
1323    if (lambda < 0.0) {
1324       samp->level = 0;
1325       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1326    }
1327    else {
1328       samp->level = (int)(lambda + 0.5) ;
1329       samp->level = MIN2(samp->level, (int)texture->last_level);
1330       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1331    }
1332
1333 #if 0
1334    printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1335           rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1336           rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1337           rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1338           rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1339 #endif
1340 }
1341
1342
1343 static void
1344 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1345                 const float s[QUAD_SIZE],
1346                 const float t[QUAD_SIZE],
1347                 const float p[QUAD_SIZE],
1348                 float lodbias,
1349                 float rgba[NUM_CHANNELS][QUAD_SIZE])
1350 {
1351    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1352    float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1353
1354    if (lambda < 0.0) {
1355       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1356    }
1357    else {
1358       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1359    }
1360 }
1361
1362
1363
1364 /**
1365  * Specialized version of mip_filter_linear with hard-wired calls to
1366  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1367  */
1368 static void
1369 mip_filter_linear_2d_linear_repeat_POT(
1370    struct tgsi_sampler *tgsi_sampler,
1371    const float s[QUAD_SIZE],
1372    const float t[QUAD_SIZE],
1373    const float p[QUAD_SIZE],
1374    float lodbias,
1375    float rgba[NUM_CHANNELS][QUAD_SIZE])
1376 {
1377    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1378    const struct pipe_texture *texture = samp->texture;
1379    int level0;
1380    float lambda;
1381
1382    lambda = compute_lambda_2d(samp, s, t, p, lodbias);
1383    level0 = (int)lambda;
1384
1385    /* Catches both negative and large values of level0:
1386     */
1387    if ((unsigned)level0 >= texture->last_level) {
1388       if (level0 < 0)
1389          samp->level = 0;
1390       else
1391          samp->level = texture->last_level;
1392
1393       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
1394    }
1395    else {
1396       float levelBlend = lambda - level0;
1397       float rgba0[4][4];
1398       float rgba1[4][4];
1399       int c,j;
1400
1401       samp->level = level0;
1402       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
1403
1404       samp->level = level0+1;
1405       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
1406
1407       for (j = 0; j < QUAD_SIZE; j++) {
1408          for (c = 0; c < 4; c++) {
1409             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1410          }
1411       }
1412    }
1413 }
1414
1415
1416
1417 /**
1418  * Do shadow/depth comparisons.
1419  */
1420 static void
1421 sample_compare(struct tgsi_sampler *tgsi_sampler,
1422                const float s[QUAD_SIZE],
1423                const float t[QUAD_SIZE],
1424                const float p[QUAD_SIZE],
1425                float lodbias,
1426                float rgba[NUM_CHANNELS][QUAD_SIZE])
1427 {
1428    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1429    const struct pipe_sampler_state *sampler = samp->sampler;
1430    int j, k0, k1, k2, k3;
1431    float val;
1432
1433    samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
1434
1435    /**
1436     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1437     * When we sampled the depth texture, the depth value was put into all
1438     * RGBA channels.  We look at the red channel here.
1439     */
1440
1441    /* compare four texcoords vs. four texture samples */
1442    switch (sampler->compare_func) {
1443    case PIPE_FUNC_LESS:
1444       k0 = p[0] < rgba[0][0];
1445       k1 = p[1] < rgba[0][1];
1446       k2 = p[2] < rgba[0][2];
1447       k3 = p[3] < rgba[0][3];
1448       break;
1449    case PIPE_FUNC_LEQUAL:
1450       k0 = p[0] <= rgba[0][0];
1451       k1 = p[1] <= rgba[0][1];
1452       k2 = p[2] <= rgba[0][2];
1453       k3 = p[3] <= rgba[0][3];
1454       break;
1455    case PIPE_FUNC_GREATER:
1456       k0 = p[0] > rgba[0][0];
1457       k1 = p[1] > rgba[0][1];
1458       k2 = p[2] > rgba[0][2];
1459       k3 = p[3] > rgba[0][3];
1460       break;
1461    case PIPE_FUNC_GEQUAL:
1462       k0 = p[0] >= rgba[0][0];
1463       k1 = p[1] >= rgba[0][1];
1464       k2 = p[2] >= rgba[0][2];
1465       k3 = p[3] >= rgba[0][3];
1466       break;
1467    case PIPE_FUNC_EQUAL:
1468       k0 = p[0] == rgba[0][0];
1469       k1 = p[1] == rgba[0][1];
1470       k2 = p[2] == rgba[0][2];
1471       k3 = p[3] == rgba[0][3];
1472       break;
1473    case PIPE_FUNC_NOTEQUAL:
1474       k0 = p[0] != rgba[0][0];
1475       k1 = p[1] != rgba[0][1];
1476       k2 = p[2] != rgba[0][2];
1477       k3 = p[3] != rgba[0][3];
1478       break;
1479    case PIPE_FUNC_ALWAYS:
1480       k0 = k1 = k2 = k3 = 1;
1481       break;
1482    case PIPE_FUNC_NEVER:
1483       k0 = k1 = k2 = k3 = 0;
1484       break;
1485    default:
1486       k0 = k1 = k2 = k3 = 0;
1487       assert(0);
1488       break;
1489    }
1490
1491    /* convert four pass/fail values to an intensity in [0,1] */
1492    val = 0.25F * (k0 + k1 + k2 + k3);
1493
1494    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1495    for (j = 0; j < 4; j++) {
1496       rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
1497       rgba[3][j] = 1.0F;
1498    }
1499 }
1500
1501
1502 /**
1503  * Compute which cube face is referenced by each texcoord and put that
1504  * info into the sampler faces[] array.  Then sample the cube faces
1505  */
1506 static void
1507 sample_cube(struct tgsi_sampler *tgsi_sampler,
1508             const float s[QUAD_SIZE],
1509             const float t[QUAD_SIZE],
1510             const float p[QUAD_SIZE],
1511             float lodbias,
1512             float rgba[NUM_CHANNELS][QUAD_SIZE])
1513 {
1514    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1515    unsigned j;
1516    float ssss[4], tttt[4];
1517
1518    /*
1519      major axis
1520      direction     target                             sc     tc    ma
1521      ----------    -------------------------------    ---    ---   ---
1522      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
1523      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
1524      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
1525      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
1526      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
1527      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
1528    */
1529    for (j = 0; j < QUAD_SIZE; j++) {
1530       float rx = s[j];
1531       float ry = t[j];
1532       float rz = p[j];
1533       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
1534       unsigned face;
1535       float sc, tc, ma;
1536
1537       if (arx >= ary && arx >= arz) {
1538          if (rx >= 0.0F) {
1539             face = PIPE_TEX_FACE_POS_X;
1540             sc = -rz;
1541             tc = -ry;
1542             ma = arx;
1543          }
1544          else {
1545             face = PIPE_TEX_FACE_NEG_X;
1546             sc = rz;
1547             tc = -ry;
1548             ma = arx;
1549          }
1550       }
1551       else if (ary >= arx && ary >= arz) {
1552          if (ry >= 0.0F) {
1553             face = PIPE_TEX_FACE_POS_Y;
1554             sc = rx;
1555             tc = rz;
1556             ma = ary;
1557          }
1558          else {
1559             face = PIPE_TEX_FACE_NEG_Y;
1560             sc = rx;
1561             tc = -rz;
1562             ma = ary;
1563          }
1564       }
1565       else {
1566          if (rz > 0.0F) {
1567             face = PIPE_TEX_FACE_POS_Z;
1568             sc = rx;
1569             tc = -ry;
1570             ma = arz;
1571          }
1572          else {
1573             face = PIPE_TEX_FACE_NEG_Z;
1574             sc = -rx;
1575             tc = -ry;
1576             ma = arz;
1577          }
1578       }
1579
1580       {
1581          const float ima = 1.0 / ma;
1582          ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
1583          tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
1584          samp->faces[j] = face;
1585       }
1586    }
1587
1588    /* In our little pipeline, the compare stage is next.  If compare
1589     * is not active, this will point somewhere deeper into the
1590     * pipeline, eg. to mip_filter or even img_filter.
1591     */
1592    samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
1593 }
1594
1595
1596
1597 static wrap_nearest_func
1598 get_nearest_unorm_wrap(unsigned mode)
1599 {
1600    switch (mode) {
1601    case PIPE_TEX_WRAP_CLAMP:
1602       return wrap_nearest_unorm_clamp;
1603    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1604    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1605       return wrap_nearest_unorm_clamp_to_border;
1606    default:
1607       assert(0);
1608       return wrap_nearest_unorm_clamp;
1609    }
1610 }
1611
1612
1613 static wrap_nearest_func
1614 get_nearest_wrap(unsigned mode)
1615 {
1616    switch (mode) {
1617    case PIPE_TEX_WRAP_REPEAT:
1618       return wrap_nearest_repeat;
1619    case PIPE_TEX_WRAP_CLAMP:
1620       return wrap_nearest_clamp;
1621    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1622       return wrap_nearest_clamp_to_edge;
1623    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1624       return wrap_nearest_clamp_to_border;
1625    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1626       return wrap_nearest_mirror_repeat;
1627    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1628       return wrap_nearest_mirror_clamp;
1629    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1630       return wrap_nearest_mirror_clamp_to_edge;
1631    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1632       return wrap_nearest_mirror_clamp_to_border;
1633    default:
1634       assert(0);
1635       return wrap_nearest_repeat;
1636    }
1637 }
1638
1639
1640 static wrap_linear_func
1641 get_linear_unorm_wrap(unsigned mode)
1642 {
1643    switch (mode) {
1644    case PIPE_TEX_WRAP_CLAMP:
1645       return wrap_linear_unorm_clamp;
1646    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1647    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1648       return wrap_linear_unorm_clamp_to_border;
1649    default:
1650       assert(0);
1651       return wrap_linear_unorm_clamp;
1652    }
1653 }
1654
1655
1656 static wrap_linear_func
1657 get_linear_wrap(unsigned mode)
1658 {
1659    switch (mode) {
1660    case PIPE_TEX_WRAP_REPEAT:
1661       return wrap_linear_repeat;
1662    case PIPE_TEX_WRAP_CLAMP:
1663       return wrap_linear_clamp;
1664    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1665       return wrap_linear_clamp_to_edge;
1666    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1667       return wrap_linear_clamp_to_border;
1668    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1669       return wrap_linear_mirror_repeat;
1670    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1671       return wrap_linear_mirror_clamp;
1672    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1673       return wrap_linear_mirror_clamp_to_edge;
1674    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1675       return wrap_linear_mirror_clamp_to_border;
1676    default:
1677       assert(0);
1678       return wrap_linear_repeat;
1679    }
1680 }
1681
1682
1683 static compute_lambda_func
1684 get_lambda_func(const union sp_sampler_key key)
1685 {
1686    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
1687       return compute_lambda_vert;
1688
1689    switch (key.bits.target) {
1690    case PIPE_TEXTURE_1D:
1691       return compute_lambda_1d;
1692    case PIPE_TEXTURE_2D:
1693    case PIPE_TEXTURE_CUBE:
1694       return compute_lambda_2d;
1695    case PIPE_TEXTURE_3D:
1696       return compute_lambda_3d;
1697    default:
1698       assert(0);
1699       return compute_lambda_1d;
1700    }
1701 }
1702
1703
1704 static filter_func
1705 get_img_filter(const union sp_sampler_key key,
1706                unsigned filter,
1707                const struct pipe_sampler_state *sampler)
1708 {
1709    switch (key.bits.target) {
1710    case PIPE_TEXTURE_1D:
1711       if (filter == PIPE_TEX_FILTER_NEAREST)
1712          return img_filter_1d_nearest;
1713       else
1714          return img_filter_1d_linear;
1715       break;
1716    case PIPE_TEXTURE_2D:
1717       /* Try for fast path:
1718        */
1719       if (key.bits.is_pot &&
1720           sampler->wrap_s == sampler->wrap_t &&
1721           sampler->normalized_coords)
1722       {
1723          switch (sampler->wrap_s) {
1724          case PIPE_TEX_WRAP_REPEAT:
1725             switch (filter) {
1726             case PIPE_TEX_FILTER_NEAREST:
1727                return img_filter_2d_nearest_repeat_POT;
1728             case PIPE_TEX_FILTER_LINEAR:
1729                return img_filter_2d_linear_repeat_POT;
1730             default:
1731                break;
1732             }
1733             break;
1734          case PIPE_TEX_WRAP_CLAMP:
1735             switch (filter) {
1736             case PIPE_TEX_FILTER_NEAREST:
1737                return img_filter_2d_nearest_clamp_POT;
1738             default:
1739                break;
1740             }
1741          }
1742       }
1743       /* Otherwise use default versions:
1744        */
1745       if (filter == PIPE_TEX_FILTER_NEAREST)
1746          return img_filter_2d_nearest;
1747       else
1748          return img_filter_2d_linear;
1749       break;
1750    case PIPE_TEXTURE_CUBE:
1751       if (filter == PIPE_TEX_FILTER_NEAREST)
1752          return img_filter_cube_nearest;
1753       else
1754          return img_filter_cube_linear;
1755       break;
1756    case PIPE_TEXTURE_3D:
1757       if (filter == PIPE_TEX_FILTER_NEAREST)
1758          return img_filter_3d_nearest;
1759       else
1760          return img_filter_3d_linear;
1761       break;
1762    default:
1763       assert(0);
1764       return img_filter_1d_nearest;
1765    }
1766 }
1767
1768
1769 /**
1770  * Bind the given texture object and texture cache to the sampler varient.
1771  */
1772 void
1773 sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
1774                                  struct softpipe_tex_tile_cache *tex_cache,
1775                                  const struct pipe_texture *texture )
1776 {
1777    const struct pipe_sampler_state *sampler = samp->sampler;
1778
1779    samp->texture = texture;
1780    samp->cache = tex_cache;
1781    samp->xpot = util_unsigned_logbase2( texture->width[0] );
1782    samp->ypot = util_unsigned_logbase2( texture->height[0] );
1783    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
1784 }
1785
1786
1787 void
1788 sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
1789 {
1790    FREE(samp);
1791 }
1792
1793
1794 /**
1795  * Create a sampler varient for a given set of non-orthogonal state.
1796  */
1797 struct sp_sampler_varient *
1798 sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
1799                            const union sp_sampler_key key )
1800 {
1801    struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
1802    if (!samp)
1803       return NULL;
1804
1805    samp->sampler = sampler;
1806    samp->key = key;
1807
1808    /* Note that (for instance) linear_texcoord_s and
1809     * nearest_texcoord_s may be active at the same time, if the
1810     * sampler min_img_filter differs from its mag_img_filter.
1811     */
1812    if (sampler->normalized_coords) {
1813       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
1814       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
1815       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
1816
1817       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
1818       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
1819       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
1820    }
1821    else {
1822       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
1823       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
1824       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
1825
1826       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
1827       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
1828       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
1829    }
1830
1831    samp->compute_lambda = get_lambda_func( key );
1832
1833    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
1834    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
1835
1836    switch (sampler->min_mip_filter) {
1837    case PIPE_TEX_MIPFILTER_NONE:
1838       if (sampler->min_img_filter == sampler->mag_img_filter)
1839          samp->mip_filter = samp->min_img_filter;
1840       else
1841          samp->mip_filter = mip_filter_none;
1842       break;
1843
1844    case PIPE_TEX_MIPFILTER_NEAREST:
1845       samp->mip_filter = mip_filter_nearest;
1846       break;
1847
1848    case PIPE_TEX_MIPFILTER_LINEAR:
1849       if (key.bits.is_pot &&
1850           sampler->min_img_filter == sampler->mag_img_filter &&
1851           sampler->normalized_coords &&
1852           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
1853           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
1854           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
1855       {
1856          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
1857       }
1858       else
1859       {
1860          samp->mip_filter = mip_filter_linear;
1861       }
1862       break;
1863    }
1864
1865    if (sampler->compare_mode != FALSE) {
1866       samp->compare = sample_compare;
1867    }
1868    else {
1869       /* Skip compare operation by promoting the mip_filter function
1870        * pointer:
1871        */
1872       samp->compare = samp->mip_filter;
1873    }
1874
1875    if (key.bits.target == PIPE_TEXTURE_CUBE) {
1876       samp->base.get_samples = sample_cube;
1877    }
1878    else {
1879       samp->faces[0] = 0;
1880       samp->faces[1] = 0;
1881       samp->faces[2] = 0;
1882       samp->faces[3] = 0;
1883
1884       /* Skip cube face determination by promoting the compare
1885        * function pointer:
1886        */
1887       samp->base.get_samples = samp->compare;
1888    }
1889
1890    return samp;
1891 }