src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_memory.h"
  42 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  43 #include "sp_tex_sample.h"
  44 #include "sp_tex_tile_cache.h"
  45
  46
  47
  48 /*
  49  * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
  50  * see 1-pixel bands of improperly weighted linear-filtered textures.
  51  * The tests/texwrap.c demo is a good test.
  52  * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
  53  * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
  54  */
  55 #define FRAC(f)  ((f) - util_ifloor(f))
  56
  57
  58 /**
  59  * Linear interpolation macro
  60  */
  61 static INLINE float
  62 lerp(float a, float v0, float v1)
  63 {
  64    return v0 + a * (v1 - v0);
  65 }
  66
  67
  68 /**
  69  * Do 2D/biliner interpolation of float values.
  70  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  71  * a and b are the horizontal and vertical interpolants.
  72  * It's important that this function is inlined when compiled with
  73  * optimization!  If we find that's not true on some systems, convert
  74  * to a macro.
  75  */
  76 static INLINE float
  77 lerp_2d(float a, float b,
  78         float v00, float v10, float v01, float v11)
  79 {
  80    const float temp0 = lerp(a, v00, v10);
  81    const float temp1 = lerp(a, v01, v11);
  82    return lerp(b, temp0, temp1);
  83 }
  84
  85
  86 /**
  87  * As above, but 3D interpolation of 8 values.
  88  */
  89 static INLINE float
  90 lerp_3d(float a, float b, float c,
  91         float v000, float v100, float v010, float v110,
  92         float v001, float v101, float v011, float v111)
  93 {
  94    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
  95    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
  96    return lerp(c, temp0, temp1);
  97 }
  98
  99
 100
 101 /**
 102  * If A is a signed integer, A % B doesn't give the right value for A < 0
 103  * (in terms of texture repeat).  Just casting to unsigned fixes that.
 104  */
 105 #define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
 106
 107
 108 /**
 109  * Apply texture coord wrapping mode and return integer texture indexes
 110  * for a vector of four texcoords (S or T or P).
 111  * \param wrapMode  PIPE_TEX_WRAP_x
 112  * \param s  the incoming texcoords
 113  * \param size  the texture image size
 114  * \param icoord  returns the integer texcoords
 115  * \return  integer texture index
 116  */
 117 static void
 118 wrap_nearest_repeat(const float s[4], unsigned size,
 119                         int icoord[4])
 120 {
 121    uint ch;
 122
 123    /* s limited to [0,1) */
 124    /* i limited to [0,size-1] */
 125    for (ch = 0; ch < 4; ch++) {
 126       int i = util_ifloor(s[ch] * size);
 127       icoord[ch] = REMAINDER(i, size);
 128    }
 129 }
 130
 131
 132 static void
 133 wrap_nearest_clamp(const float s[4], unsigned size,
 134                    int icoord[4])
 135 {
 136    uint ch;
 137    /* s limited to [0,1] */
 138    /* i limited to [0,size-1] */
 139    for (ch = 0; ch < 4; ch++) {
 140       if (s[ch] <= 0.0F)
 141          icoord[ch] = 0;
 142       else if (s[ch] >= 1.0F)
 143          icoord[ch] = size - 1;
 144       else
 145          icoord[ch] = util_ifloor(s[ch] * size);
 146    }
 147 }
 148
 149
 150 static void
 151 wrap_nearest_clamp_to_edge(const float s[4], unsigned size,
 152                            int icoord[4])
 153 {
 154    uint ch;
 155    /* s limited to [min,max] */
 156    /* i limited to [0, size-1] */
 157    const float min = 1.0F / (2.0F * size);
 158    const float max = 1.0F - min;
 159    for (ch = 0; ch < 4; ch++) {
 160       if (s[ch] < min)
 161          icoord[ch] = 0;
 162       else if (s[ch] > max)
 163          icoord[ch] = size - 1;
 164       else
 165          icoord[ch] = util_ifloor(s[ch] * size);
 166    }
 167 }
 168
 169
 170 static void
 171 wrap_nearest_clamp_to_border(const float s[4], unsigned size,
 172                              int icoord[4])
 173 {
 174    uint ch;
 175    /* s limited to [min,max] */
 176    /* i limited to [-1, size] */
 177    const float min = -1.0F / (2.0F * size);
 178    const float max = 1.0F - min;
 179    for (ch = 0; ch < 4; ch++) {
 180       if (s[ch] <= min)
 181          icoord[ch] = -1;
 182       else if (s[ch] >= max)
 183          icoord[ch] = size;
 184       else
 185          icoord[ch] = util_ifloor(s[ch] * size);
 186    }
 187 }
 188
 189 static void
 190 wrap_nearest_mirror_repeat(const float s[4], unsigned size,
 191                            int icoord[4])
 192 {
 193    uint ch;
 194    const float min = 1.0F / (2.0F * size);
 195    const float max = 1.0F - min;
 196    for (ch = 0; ch < 4; ch++) {
 197       const int flr = util_ifloor(s[ch]);
 198       float u;
 199       if (flr & 1)
 200          u = 1.0F - (s[ch] - (float) flr);
 201       else
 202          u = s[ch] - (float) flr;
 203       if (u < min)
 204          icoord[ch] = 0;
 205       else if (u > max)
 206          icoord[ch] = size - 1;
 207       else
 208          icoord[ch] = util_ifloor(u * size);
 209    }
 210 }
 211
 212 static void
 213 wrap_nearest_mirror_clamp(const float s[4], unsigned size,
 214                           int icoord[4])
 215 {
 216    uint ch;
 217    for (ch = 0; ch < 4; ch++) {
 218       /* s limited to [0,1] */
 219       /* i limited to [0,size-1] */
 220       const float u = fabsf(s[ch]);
 221       if (u <= 0.0F)
 222          icoord[ch] = 0;
 223       else if (u >= 1.0F)
 224          icoord[ch] = size - 1;
 225       else
 226          icoord[ch] = util_ifloor(u * size);
 227    }
 228 }
 229
 230 static void
 231 wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
 232                            int icoord[4])
 233 {
 234    uint ch;
 235    /* s limited to [min,max] */
 236    /* i limited to [0, size-1] */
 237    const float min = 1.0F / (2.0F * size);
 238    const float max = 1.0F - min;
 239    for (ch = 0; ch < 4; ch++) {
 240       const float u = fabsf(s[ch]);
 241       if (u < min)
 242          icoord[ch] = 0;
 243       else if (u > max)
 244          icoord[ch] = size - 1;
 245       else
 246          icoord[ch] = util_ifloor(u * size);
 247    }
 248 }
 249
 250
 251 static void
 252 wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
 253                                     int icoord[4])
 254 {
 255    uint ch;
 256    /* s limited to [min,max] */
 257    /* i limited to [0, size-1] */
 258    const float min = -1.0F / (2.0F * size);
 259    const float max = 1.0F - min;
 260    for (ch = 0; ch < 4; ch++) {
 261       const float u = fabsf(s[ch]);
 262       if (u < min)
 263          icoord[ch] = -1;
 264       else if (u > max)
 265          icoord[ch] = size;
 266       else
 267          icoord[ch] = util_ifloor(u * size);
 268    }
 269 }
 270
 271
 272 /**
 273  * Used to compute texel locations for linear sampling for four texcoords.
 274  * \param wrapMode  PIPE_TEX_WRAP_x
 275  * \param s  the texcoords
 276  * \param size  the texture image size
 277  * \param icoord0  returns first texture indexes
 278  * \param icoord1  returns second texture indexes (usually icoord0 + 1)
 279  * \param w  returns blend factor/weight between texture indexes
 280  * \param icoord  returns the computed integer texture coords
 281  */
 282 static void
 283 wrap_linear_repeat(const float s[4], unsigned size,
 284                    int icoord0[4], int icoord1[4], float w[4])
 285 {
 286    uint ch;
 287
 288    for (ch = 0; ch < 4; ch++) {
 289       float u = s[ch] * size - 0.5F;
 290       icoord0[ch] = REMAINDER(util_ifloor(u), size);
 291       icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
 292       w[ch] = FRAC(u);
 293    }
 294 }
 295
 296 static void
 297 wrap_linear_clamp(const float s[4], unsigned size,
 298                   int icoord0[4], int icoord1[4], float w[4])
 299 {
 300    uint ch;
 301    for (ch = 0; ch < 4; ch++) {
 302       float u = CLAMP(s[ch], 0.0F, 1.0F);
 303       u = u * size - 0.5f;
 304       icoord0[ch] = util_ifloor(u);
 305       icoord1[ch] = icoord0[ch] + 1;
 306       w[ch] = FRAC(u);
 307    }
 308 }
 309
 310 static void
 311 wrap_linear_clamp_to_edge(const float s[4], unsigned size,
 312                           int icoord0[4], int icoord1[4], float w[4])
 313 {
 314    uint ch;
 315    for (ch = 0; ch < 4; ch++) {
 316       float u = CLAMP(s[ch], 0.0F, 1.0F);
 317       u = u * size - 0.5f;
 318       icoord0[ch] = util_ifloor(u);
 319       icoord1[ch] = icoord0[ch] + 1;
 320       if (icoord0[ch] < 0)
 321          icoord0[ch] = 0;
 322       if (icoord1[ch] >= (int) size)
 323          icoord1[ch] = size - 1;
 324       w[ch] = FRAC(u);
 325    }
 326 }
 327
 328 static void
 329 wrap_linear_clamp_to_border(const float s[4], unsigned size,
 330                             int icoord0[4], int icoord1[4], float w[4])
 331 {
 332    const float min = -1.0F / (2.0F * size);
 333    const float max = 1.0F - min;
 334    uint ch;
 335    for (ch = 0; ch < 4; ch++) {
 336       float u = CLAMP(s[ch], min, max);
 337       u = u * size - 0.5f;
 338       icoord0[ch] = util_ifloor(u);
 339       icoord1[ch] = icoord0[ch] + 1;
 340       w[ch] = FRAC(u);
 341    }
 342 }
 343
 344
 345 static void
 346 wrap_linear_mirror_repeat(const float s[4], unsigned size,
 347                           int icoord0[4], int icoord1[4], float w[4])
 348 {
 349    uint ch;
 350    for (ch = 0; ch < 4; ch++) {
 351       const int flr = util_ifloor(s[ch]);
 352       float u;
 353       if (flr & 1)
 354          u = 1.0F - (s[ch] - (float) flr);
 355       else
 356          u = s[ch] - (float) flr;
 357       u = u * size - 0.5F;
 358       icoord0[ch] = util_ifloor(u);
 359       icoord1[ch] = icoord0[ch] + 1;
 360       if (icoord0[ch] < 0)
 361          icoord0[ch] = 0;
 362       if (icoord1[ch] >= (int) size)
 363          icoord1[ch] = size - 1;
 364       w[ch] = FRAC(u);
 365    }
 366 }
 367
 368 static void
 369 wrap_linear_mirror_clamp(const float s[4], unsigned size,
 370                          int icoord0[4], int icoord1[4], float w[4])
 371 {
 372    uint ch;
 373    for (ch = 0; ch < 4; ch++) {
 374       float u = fabsf(s[ch]);
 375       if (u >= 1.0F)
 376          u = (float) size;
 377       else
 378          u *= size;
 379       u -= 0.5F;
 380       icoord0[ch] = util_ifloor(u);
 381       icoord1[ch] = icoord0[ch] + 1;
 382       w[ch] = FRAC(u);
 383    }
 384 }
 385
 386 static void
 387 wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
 388                                  int icoord0[4], int icoord1[4], float w[4])
 389 {
 390    uint ch;
 391    for (ch = 0; ch < 4; ch++) {
 392       float u = fabsf(s[ch]);
 393       if (u >= 1.0F)
 394          u = (float) size;
 395       else
 396          u *= size;
 397       u -= 0.5F;
 398       icoord0[ch] = util_ifloor(u);
 399       icoord1[ch] = icoord0[ch] + 1;
 400       if (icoord0[ch] < 0)
 401          icoord0[ch] = 0;
 402       if (icoord1[ch] >= (int) size)
 403          icoord1[ch] = size - 1;
 404       w[ch] = FRAC(u);
 405    }
 406 }
 407
 408 static void
 409 wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
 410                                    int icoord0[4], int icoord1[4], float w[4])
 411 {
 412    const float min = -1.0F / (2.0F * size);
 413    const float max = 1.0F - min;
 414    uint ch;
 415    for (ch = 0; ch < 4; ch++) {
 416       float u = fabsf(s[ch]);
 417       if (u <= min)
 418          u = min * size;
 419       else if (u >= max)
 420          u = max * size;
 421       else
 422          u *= size;
 423       u -= 0.5F;
 424       icoord0[ch] = util_ifloor(u);
 425       icoord1[ch] = icoord0[ch] + 1;
 426       w[ch] = FRAC(u);
 427    }
 428 }
 429
 430
 431 /**
 432  * For RECT textures / unnormalized texcoords
 433  * Only a subset of wrap modes supported.
 434  */
 435 static void
 436 wrap_nearest_unorm_clamp(const float s[4], unsigned size,
 437                           int icoord[4])
 438 {
 439    uint ch;
 440    for (ch = 0; ch < 4; ch++) {
 441       int i = util_ifloor(s[ch]);
 442       icoord[ch]= CLAMP(i, 0, (int) size-1);
 443    }
 444 }
 445
 446 /* Handles clamp_to_edge and clamp_to_border:
 447  */
 448 static void
 449 wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
 450                                int icoord[4])
 451 {
 452    uint ch;
 453    for (ch = 0; ch < 4; ch++) {
 454       icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
 455    }
 456 }
 457
 458
 459 /**
 460  * For RECT textures / unnormalized texcoords.
 461  * Only a subset of wrap modes supported.
 462  */
 463 static void
 464 wrap_linear_unorm_clamp(const float s[4], unsigned size,
 465                          int icoord0[4], int icoord1[4], float w[4])
 466 {
 467    uint ch;
 468    for (ch = 0; ch < 4; ch++) {
 469       /* Not exactly what the spec says, but it matches NVIDIA output */
 470       float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
 471       icoord0[ch] = util_ifloor(u);
 472       icoord1[ch] = icoord0[ch] + 1;
 473       w[ch] = FRAC(u);
 474    }
 475 }
 476
 477 static void
 478 wrap_linear_unorm_clamp_to_border( const float s[4], unsigned size,
 479                                    int icoord0[4], int icoord1[4], float w[4])
 480 {
 481    uint ch;
 482    for (ch = 0; ch < 4; ch++) {
 483       float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
 484       u -= 0.5F;
 485       icoord0[ch] = util_ifloor(u);
 486       icoord1[ch] = icoord0[ch] + 1;
 487       if (icoord1[ch] > (int) size - 1)
 488          icoord1[ch] = size - 1;
 489       w[ch] = FRAC(u);
 490    }
 491 }
 492
 493
 494
 495
 496
 497 /**
 498  * Examine the quad's texture coordinates to compute the partial
 499  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 500  */
 501 static float
 502 compute_lambda_1d(const struct sp_sampler_varient *samp,
 503                   const float s[QUAD_SIZE],
 504                   const float t[QUAD_SIZE],
 505                   const float p[QUAD_SIZE],
 506                   float lodbias)
 507 {
 508    const struct pipe_texture *texture = samp->texture;
 509    const struct pipe_sampler_state *sampler = samp->sampler;
 510    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 511    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 512    float rho = MAX2(dsdx, dsdy) * texture->width[0];
 513    float lambda;
 514
 515    lambda = util_fast_log2(rho);
 516    lambda += lodbias + sampler->lod_bias;
 517    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 518
 519    return lambda;
 520 }
 521
 522 static float
 523 compute_lambda_2d(const struct sp_sampler_varient *samp,
 524                   const float s[QUAD_SIZE],
 525                   const float t[QUAD_SIZE],
 526                   const float p[QUAD_SIZE],
 527                   float lodbias)
 528 {
 529    const struct pipe_texture *texture = samp->texture;
 530    const struct pipe_sampler_state *sampler = samp->sampler;
 531    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 532    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 533    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 534    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 535    float maxx = MAX2(dsdx, dsdy) * texture->width[0];
 536    float maxy = MAX2(dtdx, dtdy) * texture->height[0];
 537    float rho  = MAX2(maxx, maxy);
 538    float lambda;
 539
 540    lambda = util_fast_log2(rho);
 541    lambda += lodbias + sampler->lod_bias;
 542    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 543
 544    return lambda;
 545 }
 546
 547
 548 static float
 549 compute_lambda_3d(const struct sp_sampler_varient *samp,
 550                   const float s[QUAD_SIZE],
 551                   const float t[QUAD_SIZE],
 552                   const float p[QUAD_SIZE],
 553                   float lodbias)
 554 {
 555    const struct pipe_texture *texture = samp->texture;
 556    const struct pipe_sampler_state *sampler = samp->sampler;
 557    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 558    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 559    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 560    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 561    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 562    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 563    float maxx = MAX2(dsdx, dsdy) * texture->width[0];
 564    float maxy = MAX2(dtdx, dtdy) * texture->height[0];
 565    float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
 566    float rho, lambda;
 567
 568    rho = MAX2(maxx, maxy);
 569    rho = MAX2(rho, maxz);
 570
 571    lambda = util_fast_log2(rho);
 572    lambda += lodbias + sampler->lod_bias;
 573    lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 574
 575    return lambda;
 576 }
 577
 578
 579
 580 static float
 581 compute_lambda_vert(const struct sp_sampler_varient *samp,
 582                     const float s[QUAD_SIZE],
 583                     const float t[QUAD_SIZE],
 584                     const float p[QUAD_SIZE],
 585                     float lodbias)
 586 {
 587    return lodbias;
 588 }
 589
 590
 591
 592 /**
 593  * Get a texel from a texture, using the texture tile cache.
 594  *
 595  * \param addr  the template tex address containing cube, z, face info.
 596  * \param x  the x coord of texel within 2D image
 597  * \param y  the y coord of texel within 2D image
 598  * \param rgba  the quad to put the texel/color into
 599  *
 600  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 601  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
 602  */
 603
 604
 605
 606
 607 static INLINE const float *
 608 get_texel_2d_no_border(const struct sp_sampler_varient *samp,
 609                        union tex_tile_address addr, int x, int y)
 610 {
 611    const struct softpipe_tex_cached_tile *tile;
 612
 613    addr.bits.x = x / TILE_SIZE;
 614    addr.bits.y = y / TILE_SIZE;
 615    y %= TILE_SIZE;
 616    x %= TILE_SIZE;
 617
 618    tile = sp_get_cached_tile_tex(samp->cache, addr);
 619
 620    return &tile->data.color[y][x][0];
 621 }
 622
 623
 624 static INLINE const float *
 625 get_texel_2d(const struct sp_sampler_varient *samp,
 626              union tex_tile_address addr, int x, int y)
 627 {
 628    const struct pipe_texture *texture = samp->texture;
 629    unsigned level = addr.bits.level;
 630
 631    if (x < 0 || x >= (int) texture->width[level] ||
 632        y < 0 || y >= (int) texture->height[level]) {
 633       return samp->sampler->border_color;
 634    }
 635    else {
 636       return get_texel_2d_no_border( samp, addr, x, y );
 637    }
 638 }
 639
 640
 641 /* Gather a quad of adjacent texels within a tile:
 642  */
 643 static INLINE void
 644 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
 645                                         union tex_tile_address addr,
 646                                         unsigned x, unsigned y,
 647                                         const float *out[4])
 648 {
 649    const struct softpipe_tex_cached_tile *tile;
 650
 651    addr.bits.x = x / TILE_SIZE;
 652    addr.bits.y = y / TILE_SIZE;
 653    y %= TILE_SIZE;
 654    x %= TILE_SIZE;
 655
 656    tile = sp_get_cached_tile_tex(samp->cache, addr);
 657
 658    out[0] = &tile->data.color[y  ][x  ][0];
 659    out[1] = &tile->data.color[y  ][x+1][0];
 660    out[2] = &tile->data.color[y+1][x  ][0];
 661    out[3] = &tile->data.color[y+1][x+1][0];
 662 }
 663
 664
 665 /* Gather a quad of potentially non-adjacent texels:
 666  */
 667 static INLINE void
 668 get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
 669                             union tex_tile_address addr,
 670                             int x0, int y0,
 671                             int x1, int y1,
 672                             const float *out[4])
 673 {
 674    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
 675    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
 676    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
 677    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
 678 }
 679
 680 /* Can involve a lot of unnecessary checks for border color:
 681  */
 682 static INLINE void
 683 get_texel_quad_2d(const struct sp_sampler_varient *samp,
 684                   union tex_tile_address addr,
 685                   int x0, int y0,
 686                   int x1, int y1,
 687                   const float *out[4])
 688 {
 689    out[0] = get_texel_2d( samp, addr, x0, y0 );
 690    out[1] = get_texel_2d( samp, addr, x1, y0 );
 691    out[3] = get_texel_2d( samp, addr, x1, y1 );
 692    out[2] = get_texel_2d( samp, addr, x0, y1 );
 693 }
 694
 695
 696
 697 /* 3d varients:
 698  */
 699 static INLINE const float *
 700 get_texel_3d_no_border(const struct sp_sampler_varient *samp,
 701                     union tex_tile_address addr, int x, int y, int z)
 702 {
 703    const struct softpipe_tex_cached_tile *tile;
 704
 705    addr.bits.x = x / TILE_SIZE;
 706    addr.bits.y = y / TILE_SIZE;
 707    addr.bits.z = z;
 708    y %= TILE_SIZE;
 709    x %= TILE_SIZE;
 710
 711    tile = sp_get_cached_tile_tex(samp->cache, addr);
 712
 713    return &tile->data.color[y][x][0];
 714 }
 715
 716
 717 static INLINE const float *
 718 get_texel_3d(const struct sp_sampler_varient *samp,
 719              union tex_tile_address addr, int x, int y, int z )
 720 {
 721    const struct pipe_texture *texture = samp->texture;
 722    unsigned level = addr.bits.level;
 723
 724    if (x < 0 || x >= (int) texture->width[level] ||
 725        y < 0 || y >= (int) texture->height[level] ||
 726        z < 0 || z >= (int) texture->depth[level]) {
 727       return samp->sampler->border_color;
 728    }
 729    else {
 730       return get_texel_3d_no_border( samp, addr, x, y, z );
 731    }
 732 }
 733
 734
 735 /**
 736  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 737  * return the size (in texels) of that mipmap level.
 738  * For example, if level[0].width = 256 then base_pot will be 8.
 739  * If level = 2, then we'll return 64 (the width at level=2).
 740  * Return 1 if level > base_pot.
 741  */
 742 static INLINE unsigned
 743 pot_level_size(unsigned base_pot, unsigned level)
 744 {
 745    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 746 }
 747
 748
 749 /* Some image-filter fastpaths:
 750  */
 751 static INLINE void
 752 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 753                                   const float s[QUAD_SIZE],
 754                                   const float t[QUAD_SIZE],
 755                                   const float p[QUAD_SIZE],
 756                                   float lodbias,
 757                                   float rgba[NUM_CHANNELS][QUAD_SIZE])
 758 {
 759    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 760    unsigned  j;
 761    unsigned level = samp->level;
 762    unsigned xpot = pot_level_size(samp->xpot, level);
 763    unsigned ypot = pot_level_size(samp->ypot, level);
 764    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 765    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 766    union tex_tile_address addr;
 767
 768    addr.value = 0;
 769    addr.bits.level = samp->level;
 770
 771
 772    for (j = 0; j < QUAD_SIZE; j++) {
 773       int c;
 774
 775       float u = s[j] * xpot - 0.5F;
 776       float v = t[j] * ypot - 0.5F;
 777
 778       int uflr = util_ifloor(u);
 779       int vflr = util_ifloor(v);
 780
 781       float xw = u - (float)uflr;
 782       float yw = v - (float)vflr;
 783
 784       int x0 = uflr & (xpot - 1);
 785       int y0 = vflr & (ypot - 1);
 786
 787       const float *tx[4];
 788
 789       /* Can we fetch all four at once:
 790        */
 791       if (x0 < xmax && y0 < ymax)
 792       {
 793          get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
 794       }
 795       else
 796       {
 797          unsigned x1 = (x0 + 1) & (xpot - 1);
 798          unsigned y1 = (y0 + 1) & (ypot - 1);
 799          get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
 800       }
 801
 802
 803       /* interpolate R, G, B, A */
 804       for (c = 0; c < 4; c++) {
 805          rgba[c][j] = lerp_2d(xw, yw,
 806                               tx[0][c], tx[1][c],
 807                               tx[2][c], tx[3][c]);
 808       }
 809    }
 810 }
 811
 812
 813 static INLINE void
 814 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 815                                  const float s[QUAD_SIZE],
 816                                  const float t[QUAD_SIZE],
 817                                  const float p[QUAD_SIZE],
 818                                  float lodbias,
 819                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 820 {
 821    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 822    unsigned  j;
 823    unsigned level = samp->level;
 824    unsigned xpot = pot_level_size(samp->xpot, level);
 825    unsigned ypot = pot_level_size(samp->ypot, level);
 826    union tex_tile_address addr;
 827
 828    addr.value = 0;
 829    addr.bits.level = samp->level;
 830
 831    for (j = 0; j < QUAD_SIZE; j++) {
 832       int c;
 833
 834       float u = s[j] * xpot;
 835       float v = t[j] * ypot;
 836
 837       int uflr = util_ifloor(u);
 838       int vflr = util_ifloor(v);
 839
 840       int x0 = uflr & (xpot - 1);
 841       int y0 = vflr & (ypot - 1);
 842
 843       const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
 844
 845       for (c = 0; c < 4; c++) {
 846          rgba[c][j] = out[c];
 847       }
 848    }
 849 }
 850
 851
 852 static INLINE void
 853 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 854                                 const float s[QUAD_SIZE],
 855                                 const float t[QUAD_SIZE],
 856                                 const float p[QUAD_SIZE],
 857                                 float lodbias,
 858                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 859 {
 860    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 861    unsigned  j;
 862    unsigned level = samp->level;
 863    unsigned xpot = pot_level_size(samp->xpot, level);
 864    unsigned ypot = pot_level_size(samp->ypot, level);
 865    union tex_tile_address addr;
 866
 867    addr.value = 0;
 868    addr.bits.level = samp->level;
 869
 870    for (j = 0; j < QUAD_SIZE; j++) {
 871       int c;
 872
 873       float u = s[j] * xpot;
 874       float v = t[j] * ypot;
 875
 876       int x0, y0;
 877       const float *out;
 878
 879       x0 = util_ifloor(u);
 880       if (x0 < 0)
 881          x0 = 0;
 882       else if (x0 > xpot - 1)
 883          x0 = xpot - 1;
 884
 885       y0 = util_ifloor(v);
 886       if (y0 < 0)
 887          y0 = 0;
 888       else if (y0 > ypot - 1)
 889          y0 = ypot - 1;
 890
 891       out = get_texel_2d_no_border(samp, addr, x0, y0);
 892
 893       for (c = 0; c < 4; c++) {
 894          rgba[c][j] = out[c];
 895       }
 896    }
 897 }
 898
 899 static void
 900 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
 901                         const float s[QUAD_SIZE],
 902                         const float t[QUAD_SIZE],
 903                         const float p[QUAD_SIZE],
 904                         float lodbias,
 905                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 906 {
 907    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 908    const struct pipe_texture *texture = samp->texture;
 909    unsigned level0, j;
 910    int width;
 911    int x[4];
 912    union tex_tile_address addr;
 913
 914    level0 = samp->level;
 915    width = texture->width[level0];
 916
 917    assert(width > 0);
 918
 919    addr.value = 0;
 920    addr.bits.level = samp->level;
 921
 922    samp->nearest_texcoord_s(s, width, x);
 923
 924    for (j = 0; j < QUAD_SIZE; j++) {
 925       const float *out = get_texel_2d(samp, addr, x[j], 0);
 926       int c;
 927       for (c = 0; c < 4; c++) {
 928          rgba[c][j] = out[c];
 929       }
 930    }
 931 }
 932
 933
 934 static void
 935 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
 936                       const float s[QUAD_SIZE],
 937                       const float t[QUAD_SIZE],
 938                       const float p[QUAD_SIZE],
 939                       float lodbias,
 940                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 941 {
 942    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 943    const struct pipe_texture *texture = samp->texture;
 944    unsigned level0, j;
 945    int width, height;
 946    int x[4], y[4];
 947    union tex_tile_address addr;
 948
 949
 950    level0 = samp->level;
 951    width = texture->width[level0];
 952    height = texture->height[level0];
 953
 954    assert(width > 0);
 955    assert(height > 0);
 956
 957    addr.value = 0;
 958    addr.bits.level = samp->level;
 959
 960    samp->nearest_texcoord_s(s, width, x);
 961    samp->nearest_texcoord_t(t, height, y);
 962
 963    for (j = 0; j < QUAD_SIZE; j++) {
 964       const float *out = get_texel_2d(samp, addr, x[j], y[j]);
 965       int c;
 966       for (c = 0; c < 4; c++) {
 967          rgba[c][j] = out[c];
 968       }
 969    }
 970 }
 971
 972 static inline union tex_tile_address face( union tex_tile_address addr,
 973                                            unsigned face )
 974 {
 975    addr.bits.face = face;
 976    return addr;
 977 }
 978
 979 static void
 980 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
 981                       const float s[QUAD_SIZE],
 982                       const float t[QUAD_SIZE],
 983                       const float p[QUAD_SIZE],
 984                       float lodbias,
 985                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 986 {
 987    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 988    const struct pipe_texture *texture = samp->texture;
 989    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
 990    unsigned level0, j;
 991    int width, height;
 992    int x[4], y[4];
 993    union tex_tile_address addr;
 994
 995
 996    level0 = samp->level;
 997    width = texture->width[level0];
 998    height = texture->height[level0];
 999
1000    assert(width > 0);
1001    assert(height > 0);
1002
1003    addr.value = 0;
1004    addr.bits.level = samp->level;
1005
1006    samp->nearest_texcoord_s(s, width, x);
1007    samp->nearest_texcoord_t(t, height, y);
1008
1009    for (j = 0; j < QUAD_SIZE; j++) {
1010       const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
1011       int c;
1012       for (c = 0; c < 4; c++) {
1013          rgba[c][j] = out[c];
1014       }
1015    }
1016 }
1017
1018
1019 static void
1020 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1021                       const float s[QUAD_SIZE],
1022                       const float t[QUAD_SIZE],
1023                       const float p[QUAD_SIZE],
1024                       float lodbias,
1025                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1026 {
1027    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1028    const struct pipe_texture *texture = samp->texture;
1029    unsigned level0, j;
1030    int width, height, depth;
1031    int x[4], y[4], z[4];
1032    union tex_tile_address addr;
1033
1034    level0 = samp->level;
1035    width = texture->width[level0];
1036    height = texture->height[level0];
1037    depth = texture->depth[level0];
1038
1039    assert(width > 0);
1040    assert(height > 0);
1041    assert(depth > 0);
1042
1043    samp->nearest_texcoord_s(s, width,  x);
1044    samp->nearest_texcoord_t(t, height, y);
1045    samp->nearest_texcoord_p(p, depth,  z);
1046
1047    addr.value = 0;
1048    addr.bits.level = samp->level;
1049
1050    for (j = 0; j < QUAD_SIZE; j++) {
1051       const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
1052       int c;
1053       for (c = 0; c < 4; c++) {
1054          rgba[c][j] = out[c];
1055       }
1056    }
1057 }
1058
1059
1060 static void
1061 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1062                      const float s[QUAD_SIZE],
1063                      const float t[QUAD_SIZE],
1064                      const float p[QUAD_SIZE],
1065                      float lodbias,
1066                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1067 {
1068    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1069    const struct pipe_texture *texture = samp->texture;
1070    unsigned level0, j;
1071    int width;
1072    int x0[4], x1[4];
1073    float xw[4]; /* weights */
1074    union tex_tile_address addr;
1075
1076
1077    level0 = samp->level;
1078    width = texture->width[level0];
1079
1080    assert(width > 0);
1081
1082    addr.value = 0;
1083    addr.bits.level = samp->level;
1084
1085    samp->linear_texcoord_s(s, width, x0, x1, xw);
1086
1087
1088    for (j = 0; j < QUAD_SIZE; j++) {
1089       const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
1090       const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
1091       int c;
1092
1093       /* interpolate R, G, B, A */
1094       for (c = 0; c < 4; c++) {
1095          rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1096       }
1097    }
1098 }
1099
1100
1101 static void
1102 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1103                      const float s[QUAD_SIZE],
1104                      const float t[QUAD_SIZE],
1105                      const float p[QUAD_SIZE],
1106                      float lodbias,
1107                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1108 {
1109    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1110    const struct pipe_texture *texture = samp->texture;
1111    unsigned level0, j;
1112    int width, height;
1113    int x0[4], y0[4], x1[4], y1[4];
1114    float xw[4], yw[4]; /* weights */
1115    union tex_tile_address addr;
1116
1117
1118    level0 = samp->level;
1119    width = texture->width[level0];
1120    height = texture->height[level0];
1121
1122    assert(width > 0);
1123    assert(height > 0);
1124
1125    addr.value = 0;
1126    addr.bits.level = samp->level;
1127
1128    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1129    samp->linear_texcoord_t(t, height, y0, y1, yw);
1130
1131    for (j = 0; j < QUAD_SIZE; j++) {
1132       const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
1133       const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
1134       const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
1135       const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
1136       int c;
1137
1138       /* interpolate R, G, B, A */
1139       for (c = 0; c < 4; c++) {
1140          rgba[c][j] = lerp_2d(xw[j], yw[j],
1141                               tx0[c], tx1[c],
1142                               tx2[c], tx3[c]);
1143       }
1144    }
1145 }
1146
1147
1148 static void
1149 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1150                      const float s[QUAD_SIZE],
1151                      const float t[QUAD_SIZE],
1152                      const float p[QUAD_SIZE],
1153                      float lodbias,
1154                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1155 {
1156    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1157    const struct pipe_texture *texture = samp->texture;
1158    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1159    unsigned level0, j;
1160    int width, height;
1161    int x0[4], y0[4], x1[4], y1[4];
1162    float xw[4], yw[4]; /* weights */
1163    union tex_tile_address addr;
1164
1165
1166    level0 = samp->level;
1167    width = texture->width[level0];
1168    height = texture->height[level0];
1169
1170    assert(width > 0);
1171    assert(height > 0);
1172
1173    addr.value = 0;
1174    addr.bits.level = samp->level;
1175
1176    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1177    samp->linear_texcoord_t(t, height, y0, y1, yw);
1178
1179    for (j = 0; j < QUAD_SIZE; j++) {
1180       union tex_tile_address addrj = face(addr, faces[j]);
1181       const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
1182       const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
1183       const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
1184       const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
1185       int c;
1186
1187       /* interpolate R, G, B, A */
1188       for (c = 0; c < 4; c++) {
1189          rgba[c][j] = lerp_2d(xw[j], yw[j],
1190                               tx0[c], tx1[c],
1191                               tx2[c], tx3[c]);
1192       }
1193    }
1194 }
1195
1196
1197 static void
1198 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1199                      const float s[QUAD_SIZE],
1200                      const float t[QUAD_SIZE],
1201                      const float p[QUAD_SIZE],
1202                      float lodbias,
1203                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1204 {
1205    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1206    const struct pipe_texture *texture = samp->texture;
1207    unsigned level0, j;
1208    int width, height, depth;
1209    int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1210    float xw[4], yw[4], zw[4]; /* interpolation weights */
1211    union tex_tile_address addr;
1212
1213    level0 = samp->level;
1214    width = texture->width[level0];
1215    height = texture->height[level0];
1216    depth = texture->depth[level0];
1217
1218    addr.value = 0;
1219    addr.bits.level = level0;
1220
1221    assert(width > 0);
1222    assert(height > 0);
1223    assert(depth > 0);
1224
1225    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1226    samp->linear_texcoord_t(t, height, y0, y1, yw);
1227    samp->linear_texcoord_p(p, depth,  z0, z1, zw);
1228
1229    for (j = 0; j < QUAD_SIZE; j++) {
1230       int c;
1231
1232       const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
1233       const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
1234       const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
1235       const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
1236
1237       const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
1238       const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
1239       const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
1240       const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
1241
1242       /* interpolate R, G, B, A */
1243       for (c = 0; c < 4; c++) {
1244          rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1245                               tx00[c], tx01[c],
1246                               tx02[c], tx03[c],
1247                               tx10[c], tx11[c],
1248                               tx12[c], tx13[c]);
1249       }
1250    }
1251 }
1252
1253
1254
1255
1256
1257
1258
1259 static void
1260 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1261                      const float s[QUAD_SIZE],
1262                      const float t[QUAD_SIZE],
1263                      const float p[QUAD_SIZE],
1264                      float lodbias,
1265                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1266 {
1267    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1268    const struct pipe_texture *texture = samp->texture;
1269    int level0;
1270    float lambda;
1271
1272    lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1273    level0 = (int)lambda;
1274
1275    if (lambda < 0.0) {
1276       samp->level = 0;
1277       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1278    }
1279    else if (level0 >= texture->last_level) {
1280       samp->level = texture->last_level;
1281       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1282    }
1283    else {
1284       float levelBlend = lambda - level0;
1285       float rgba0[4][4];
1286       float rgba1[4][4];
1287       int c,j;
1288
1289       samp->level = level0;
1290       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
1291
1292       samp->level = level0+1;
1293       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
1294
1295       for (j = 0; j < QUAD_SIZE; j++) {
1296          for (c = 0; c < 4; c++) {
1297             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1298          }
1299       }
1300    }
1301 }
1302
1303
1304
1305 static void
1306 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1307                    const float s[QUAD_SIZE],
1308                    const float t[QUAD_SIZE],
1309                    const float p[QUAD_SIZE],
1310                    float lodbias,
1311                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1312 {
1313    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1314    const struct pipe_texture *texture = samp->texture;
1315    float lambda;
1316
1317    lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1318
1319    if (lambda < 0.0) {
1320       samp->level = 0;
1321       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1322    }
1323    else {
1324       samp->level = (int)(lambda + 0.5) ;
1325       samp->level = MIN2(samp->level, (int)texture->last_level);
1326       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1327    }
1328
1329 #if 0
1330    printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1331           rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1332           rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1333           rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1334           rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1335 #endif
1336 }
1337
1338
1339 static void
1340 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1341                 const float s[QUAD_SIZE],
1342                 const float t[QUAD_SIZE],
1343                 const float p[QUAD_SIZE],
1344                 float lodbias,
1345                 float rgba[NUM_CHANNELS][QUAD_SIZE])
1346 {
1347    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1348    float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1349
1350    if (lambda < 0.0) {
1351       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1352    }
1353    else {
1354       samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1355    }
1356 }
1357
1358
1359
1360 /* Specialized version of mip_filter_linear with hard-wired calls to
1361  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1362  */
1363 static void
1364 mip_filter_linear_2d_linear_repeat_POT(
1365    struct tgsi_sampler *tgsi_sampler,
1366    const float s[QUAD_SIZE],
1367    const float t[QUAD_SIZE],
1368    const float p[QUAD_SIZE],
1369    float lodbias,
1370    float rgba[NUM_CHANNELS][QUAD_SIZE])
1371 {
1372    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1373    const struct pipe_texture *texture = samp->texture;
1374    int level0;
1375    float lambda;
1376
1377    lambda = compute_lambda_2d(samp, s, t, p, lodbias);
1378    level0 = (int)lambda;
1379
1380    /* Catches both negative and large values of level0:
1381     */
1382    if ((unsigned)level0 >= texture->last_level) {
1383       if (level0 < 0)
1384          samp->level = 0;
1385       else
1386          samp->level = texture->last_level;
1387
1388       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
1389    }
1390    else {
1391       float levelBlend = lambda - level0;
1392       float rgba0[4][4];
1393       float rgba1[4][4];
1394       int c,j;
1395
1396       samp->level = level0;
1397       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
1398
1399       samp->level = level0+1;
1400       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
1401
1402       for (j = 0; j < QUAD_SIZE; j++) {
1403          for (c = 0; c < 4; c++) {
1404             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1405          }
1406       }
1407    }
1408 }
1409
1410
1411
1412 /* Compare stage in the little sampling pipeline.
1413  */
1414 static void
1415 sample_compare(struct tgsi_sampler *tgsi_sampler,
1416                const float s[QUAD_SIZE],
1417                const float t[QUAD_SIZE],
1418                const float p[QUAD_SIZE],
1419                float lodbias,
1420                float rgba[NUM_CHANNELS][QUAD_SIZE])
1421 {
1422    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1423    const struct pipe_sampler_state *sampler = samp->sampler;
1424    int j, k0, k1, k2, k3;
1425    float val;
1426
1427    samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
1428
1429
1430    /**
1431     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1432     * When we sampled the depth texture, the depth value was put into all
1433     * RGBA channels.  We look at the red channel here.
1434     */
1435
1436    /* compare four texcoords vs. four texture samples */
1437    switch (sampler->compare_func) {
1438    case PIPE_FUNC_LESS:
1439       k0 = p[0] < rgba[0][0];
1440       k1 = p[1] < rgba[0][1];
1441       k2 = p[2] < rgba[0][2];
1442       k3 = p[3] < rgba[0][3];
1443       break;
1444    case PIPE_FUNC_LEQUAL:
1445       k0 = p[0] <= rgba[0][0];
1446       k1 = p[1] <= rgba[0][1];
1447       k2 = p[2] <= rgba[0][2];
1448       k3 = p[3] <= rgba[0][3];
1449       break;
1450    case PIPE_FUNC_GREATER:
1451       k0 = p[0] > rgba[0][0];
1452       k1 = p[1] > rgba[0][1];
1453       k2 = p[2] > rgba[0][2];
1454       k3 = p[3] > rgba[0][3];
1455       break;
1456    case PIPE_FUNC_GEQUAL:
1457       k0 = p[0] >= rgba[0][0];
1458       k1 = p[1] >= rgba[0][1];
1459       k2 = p[2] >= rgba[0][2];
1460       k3 = p[3] >= rgba[0][3];
1461       break;
1462    case PIPE_FUNC_EQUAL:
1463       k0 = p[0] == rgba[0][0];
1464       k1 = p[1] == rgba[0][1];
1465       k2 = p[2] == rgba[0][2];
1466       k3 = p[3] == rgba[0][3];
1467       break;
1468    case PIPE_FUNC_NOTEQUAL:
1469       k0 = p[0] != rgba[0][0];
1470       k1 = p[1] != rgba[0][1];
1471       k2 = p[2] != rgba[0][2];
1472       k3 = p[3] != rgba[0][3];
1473       break;
1474    case PIPE_FUNC_ALWAYS:
1475       k0 = k1 = k2 = k3 = 1;
1476       break;
1477    case PIPE_FUNC_NEVER:
1478       k0 = k1 = k2 = k3 = 0;
1479       break;
1480    default:
1481       k0 = k1 = k2 = k3 = 0;
1482       assert(0);
1483       break;
1484    }
1485
1486    /* convert four pass/fail values to an intensity in [0,1] */
1487    val = 0.25F * (k0 + k1 + k2 + k3);
1488
1489    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1490    for (j = 0; j < 4; j++) {
1491       rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
1492       rgba[3][j] = 1.0F;
1493    }
1494 }
1495
1496 /* Calculate cube faces.
1497  */
1498 static void
1499 sample_cube(struct tgsi_sampler *tgsi_sampler,
1500             const float s[QUAD_SIZE],
1501             const float t[QUAD_SIZE],
1502             const float p[QUAD_SIZE],
1503             float lodbias,
1504             float rgba[NUM_CHANNELS][QUAD_SIZE])
1505 {
1506    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1507    unsigned j;
1508    float ssss[4], tttt[4];
1509
1510    /*
1511      major axis
1512      direction     target                             sc     tc    ma
1513      ----------    -------------------------------    ---    ---   ---
1514      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
1515      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
1516      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
1517      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
1518      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
1519      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
1520    */
1521    for (j = 0; j < QUAD_SIZE; j++) {
1522       float rx = s[j];
1523       float ry = t[j];
1524       float rz = p[j];
1525       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
1526       unsigned face;
1527       float sc, tc, ma;
1528
1529       if (arx > ary && arx > arz) {
1530          if (rx >= 0.0F) {
1531             face = PIPE_TEX_FACE_POS_X;
1532             sc = -rz;
1533             tc = -ry;
1534             ma = arx;
1535          }
1536          else {
1537             face = PIPE_TEX_FACE_NEG_X;
1538             sc = rz;
1539             tc = -ry;
1540             ma = arx;
1541          }
1542       }
1543       else if (ary > arx && ary > arz) {
1544          if (ry >= 0.0F) {
1545             face = PIPE_TEX_FACE_POS_Y;
1546             sc = rx;
1547             tc = rz;
1548             ma = ary;
1549          }
1550          else {
1551             face = PIPE_TEX_FACE_NEG_Y;
1552             sc = rx;
1553             tc = -rz;
1554             ma = ary;
1555          }
1556       }
1557       else {
1558          if (rz > 0.0F) {
1559             face = PIPE_TEX_FACE_POS_Z;
1560             sc = rx;
1561             tc = -ry;
1562             ma = arz;
1563          }
1564          else {
1565             face = PIPE_TEX_FACE_NEG_Z;
1566             sc = -rx;
1567             tc = -ry;
1568             ma = arz;
1569          }
1570       }
1571
1572       {
1573          const float ima = 1.0 / ma;
1574          ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
1575          tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
1576          samp->faces[j] = face;
1577       }
1578    }
1579
1580    /* In our little pipeline, the compare stage is next.  If compare
1581     * is not active, this will point somewhere deeper into the
1582     * pipeline, eg. to mip_filter or even img_filter.
1583     */
1584    samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
1585 }
1586
1587
1588
1589
1590 static wrap_nearest_func get_nearest_unorm_wrap( unsigned mode )
1591 {
1592    switch (mode) {
1593    case PIPE_TEX_WRAP_CLAMP:
1594       return wrap_nearest_unorm_clamp;
1595    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1596    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1597       return wrap_nearest_unorm_clamp_to_border;
1598    default:
1599       assert(0);
1600       return wrap_nearest_unorm_clamp;
1601    }
1602 }
1603
1604
1605 static wrap_nearest_func get_nearest_wrap( unsigned mode )
1606 {
1607    switch (mode) {
1608    case PIPE_TEX_WRAP_REPEAT:
1609       return wrap_nearest_repeat;
1610    case PIPE_TEX_WRAP_CLAMP:
1611       return wrap_nearest_clamp;
1612    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1613       return wrap_nearest_clamp_to_edge;
1614    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1615       return wrap_nearest_clamp_to_border;
1616    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1617       return wrap_nearest_mirror_repeat;
1618    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1619       return wrap_nearest_mirror_clamp;
1620    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1621       return wrap_nearest_mirror_clamp_to_edge;
1622    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1623       return wrap_nearest_mirror_clamp_to_border;
1624    default:
1625       assert(0);
1626       return wrap_nearest_repeat;
1627    }
1628 }
1629
1630 static wrap_linear_func get_linear_unorm_wrap( unsigned mode )
1631 {
1632    switch (mode) {
1633    case PIPE_TEX_WRAP_CLAMP:
1634       return wrap_linear_unorm_clamp;
1635    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1636    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1637       return wrap_linear_unorm_clamp_to_border;
1638    default:
1639       assert(0);
1640       return wrap_linear_unorm_clamp;
1641    }
1642 }
1643
1644 static wrap_linear_func get_linear_wrap( unsigned mode )
1645 {
1646    switch (mode) {
1647    case PIPE_TEX_WRAP_REPEAT:
1648       return wrap_linear_repeat;
1649    case PIPE_TEX_WRAP_CLAMP:
1650       return wrap_linear_clamp;
1651    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1652       return wrap_linear_clamp_to_edge;
1653    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1654       return wrap_linear_clamp_to_border;
1655    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1656       return wrap_linear_mirror_repeat;
1657    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1658       return wrap_linear_mirror_clamp;
1659    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1660       return wrap_linear_mirror_clamp_to_edge;
1661    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1662       return wrap_linear_mirror_clamp_to_border;
1663    default:
1664       assert(0);
1665       return wrap_linear_repeat;
1666    }
1667 }
1668
1669 static compute_lambda_func get_lambda_func( const union sp_sampler_key key )
1670 {
1671    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
1672       return compute_lambda_vert;
1673
1674    switch (key.bits.target) {
1675    case PIPE_TEXTURE_1D:
1676       return compute_lambda_1d;
1677    case PIPE_TEXTURE_2D:
1678    case PIPE_TEXTURE_CUBE:
1679       return compute_lambda_2d;
1680    case PIPE_TEXTURE_3D:
1681       return compute_lambda_3d;
1682    default:
1683       assert(0);
1684       return compute_lambda_1d;
1685    }
1686 }
1687
1688 static filter_func get_img_filter( const union sp_sampler_key key,
1689                                    unsigned filter,
1690                                    const struct pipe_sampler_state *sampler )
1691 {
1692    switch (key.bits.target) {
1693    case PIPE_TEXTURE_1D:
1694       if (filter == PIPE_TEX_FILTER_NEAREST)
1695          return img_filter_1d_nearest;
1696       else
1697          return img_filter_1d_linear;
1698       break;
1699    case PIPE_TEXTURE_2D:
1700       /* Try for fast path:
1701        */
1702       if (key.bits.is_pot &&
1703           sampler->wrap_s == sampler->wrap_t &&
1704           sampler->normalized_coords)
1705       {
1706          switch (sampler->wrap_s) {
1707          case PIPE_TEX_WRAP_REPEAT:
1708             switch (filter) {
1709             case PIPE_TEX_FILTER_NEAREST:
1710                return img_filter_2d_nearest_repeat_POT;
1711             case PIPE_TEX_FILTER_LINEAR:
1712                return img_filter_2d_linear_repeat_POT;
1713             default:
1714                break;
1715             }
1716             break;
1717          case PIPE_TEX_WRAP_CLAMP:
1718             switch (filter) {
1719             case PIPE_TEX_FILTER_NEAREST:
1720                return img_filter_2d_nearest_clamp_POT;
1721             default:
1722                break;
1723             }
1724          }
1725       }
1726       /* Otherwise use default versions:
1727        */
1728       if (filter == PIPE_TEX_FILTER_NEAREST)
1729          return img_filter_2d_nearest;
1730       else
1731          return img_filter_2d_linear;
1732       break;
1733    case PIPE_TEXTURE_CUBE:
1734       if (filter == PIPE_TEX_FILTER_NEAREST)
1735          return img_filter_cube_nearest;
1736       else
1737          return img_filter_cube_linear;
1738       break;
1739    case PIPE_TEXTURE_3D:
1740       if (filter == PIPE_TEX_FILTER_NEAREST)
1741          return img_filter_3d_nearest;
1742       else
1743          return img_filter_3d_linear;
1744       break;
1745    default:
1746       assert(0);
1747       return img_filter_1d_nearest;
1748    }
1749 }
1750
1751
1752 /**
1753  * Bind the given texture object and texture cache to the sampler varient.
1754  */
1755 void
1756 sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
1757                                  struct softpipe_tex_tile_cache *tex_cache,
1758                                  const struct pipe_texture *texture )
1759 {
1760    const struct pipe_sampler_state *sampler = samp->sampler;
1761
1762    samp->texture = texture;
1763    samp->cache = tex_cache;
1764    samp->xpot = util_unsigned_logbase2( texture->width[0] );
1765    samp->ypot = util_unsigned_logbase2( texture->height[0] );
1766    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
1767 }
1768
1769
1770 void
1771 sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
1772 {
1773    FREE(samp);
1774 }
1775
1776
1777 /* Create a sampler varient for a given set of non-orthogonal state.  Currently the
1778  */
1779 struct sp_sampler_varient *
1780 sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
1781                            const union sp_sampler_key key )
1782 {
1783    struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
1784    if (!samp)
1785       return NULL;
1786
1787    samp->sampler = sampler;
1788    samp->key = key;
1789
1790    /* Note that (for instance) linear_texcoord_s and
1791     * nearest_texcoord_s may be active at the same time, if the
1792     * sampler min_img_filter differs from its mag_img_filter.
1793     */
1794    if (sampler->normalized_coords) {
1795       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
1796       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
1797       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
1798
1799       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
1800       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
1801       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
1802    }
1803    else {
1804       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
1805       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
1806       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
1807
1808       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
1809       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
1810       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
1811    }
1812
1813    samp->compute_lambda = get_lambda_func( key );
1814
1815    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
1816    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
1817
1818    switch (sampler->min_mip_filter) {
1819    case PIPE_TEX_MIPFILTER_NONE:
1820       if (sampler->min_img_filter == sampler->mag_img_filter)
1821          samp->mip_filter = samp->min_img_filter;
1822       else
1823          samp->mip_filter = mip_filter_none;
1824       break;
1825
1826    case PIPE_TEX_MIPFILTER_NEAREST:
1827       samp->mip_filter = mip_filter_nearest;
1828       break;
1829
1830    case PIPE_TEX_MIPFILTER_LINEAR:
1831       if (key.bits.is_pot &&
1832           sampler->min_img_filter == sampler->mag_img_filter &&
1833           sampler->normalized_coords &&
1834           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
1835           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
1836           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
1837       {
1838          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
1839       }
1840       else
1841       {
1842          samp->mip_filter = mip_filter_linear;
1843       }
1844       break;
1845    }
1846
1847    if (sampler->compare_mode != FALSE) {
1848       samp->compare = sample_compare;
1849    }
1850    else {
1851       /* Skip compare operation by promoting the mip_filter function
1852        * pointer:
1853        */
1854       samp->compare = samp->mip_filter;
1855    }
1856
1857    if (key.bits.target == PIPE_TEXTURE_CUBE) {
1858       samp->base.get_samples = sample_cube;
1859    }
1860    else {
1861       samp->faces[0] = 0;
1862       samp->faces[1] = 0;
1863       samp->faces[2] = 0;
1864       samp->faces[3] = 0;
1865
1866       /* Skip cube face determination by promoting the compare
1867        * function pointer:
1868        */
1869       samp->base.get_samples = samp->compare;
1870    }
1871
1872    return samp;
1873 }
1874
1875
1876
1877
1878
1879