src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_memory.h"
  42 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  43 #include "sp_tex_sample.h"
  44 #include "sp_tex_tile_cache.h"
  45
  46
  47
  48 /*
  49  * Return fractional part of 'f'.  Used for computing interpolation weights.
  50  * Need to be careful with negative values.
  51  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  52  * of improperly weighted linear-filtered textures.
  53  * The tests/texwrap.c demo is a good test.
  54  */
  55 static INLINE float
  56 frac(float f)
  57 {
  58    return f - util_ifloor(f);
  59 }
  60
  61
  62
  63 /**
  64  * Linear interpolation macro
  65  */
  66 static INLINE float
  67 lerp(float a, float v0, float v1)
  68 {
  69    return v0 + a * (v1 - v0);
  70 }
  71
  72
  73 /**
  74  * Do 2D/biliner interpolation of float values.
  75  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  76  * a and b are the horizontal and vertical interpolants.
  77  * It's important that this function is inlined when compiled with
  78  * optimization!  If we find that's not true on some systems, convert
  79  * to a macro.
  80  */
  81 static INLINE float
  82 lerp_2d(float a, float b,
  83         float v00, float v10, float v01, float v11)
  84 {
  85    const float temp0 = lerp(a, v00, v10);
  86    const float temp1 = lerp(a, v01, v11);
  87    return lerp(b, temp0, temp1);
  88 }
  89
  90
  91 /**
  92  * As above, but 3D interpolation of 8 values.
  93  */
  94 static INLINE float
  95 lerp_3d(float a, float b, float c,
  96         float v000, float v100, float v010, float v110,
  97         float v001, float v101, float v011, float v111)
  98 {
  99    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 100    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 101    return lerp(c, temp0, temp1);
 102 }
 103
 104
 105
 106 /**
 107  * Compute coord % size for repeat wrap modes.
 108  * Note that if coord is a signed integer, coord % size doesn't give
 109  * the right value for coord < 0 (in terms of texture repeat).  Just
 110  * casting to unsigned fixes that.
 111  */
 112 static INLINE int
 113 repeat(int coord, unsigned size)
 114 {
 115    return (int) ((unsigned) coord % size);
 116 }
 117
 118
 119 /**
 120  * Apply texture coord wrapping mode and return integer texture indexes
 121  * for a vector of four texcoords (S or T or P).
 122  * \param wrapMode  PIPE_TEX_WRAP_x
 123  * \param s  the incoming texcoords
 124  * \param size  the texture image size
 125  * \param icoord  returns the integer texcoords
 126  * \return  integer texture index
 127  */
 128 static void
 129 wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
 130 {
 131    uint ch;
 132    /* s limited to [0,1) */
 133    /* i limited to [0,size-1] */
 134    for (ch = 0; ch < 4; ch++) {
 135       int i = util_ifloor(s[ch] * size);
 136       icoord[ch] = repeat(i, size);
 137    }
 138 }
 139
 140
 141 static void
 142 wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
 143 {
 144    uint ch;
 145    /* s limited to [0,1] */
 146    /* i limited to [0,size-1] */
 147    for (ch = 0; ch < 4; ch++) {
 148       if (s[ch] <= 0.0F)
 149          icoord[ch] = 0;
 150       else if (s[ch] >= 1.0F)
 151          icoord[ch] = size - 1;
 152       else
 153          icoord[ch] = util_ifloor(s[ch] * size);
 154    }
 155 }
 156
 157
 158 static void
 159 wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4])
 160 {
 161    uint ch;
 162    /* s limited to [min,max] */
 163    /* i limited to [0, size-1] */
 164    const float min = 1.0F / (2.0F * size);
 165    const float max = 1.0F - min;
 166    for (ch = 0; ch < 4; ch++) {
 167       if (s[ch] < min)
 168          icoord[ch] = 0;
 169       else if (s[ch] > max)
 170          icoord[ch] = size - 1;
 171       else
 172          icoord[ch] = util_ifloor(s[ch] * size);
 173    }
 174 }
 175
 176
 177 static void
 178 wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4])
 179 {
 180    uint ch;
 181    /* s limited to [min,max] */
 182    /* i limited to [-1, size] */
 183    const float min = -1.0F / (2.0F * size);
 184    const float max = 1.0F - min;
 185    for (ch = 0; ch < 4; ch++) {
 186       if (s[ch] <= min)
 187          icoord[ch] = -1;
 188       else if (s[ch] >= max)
 189          icoord[ch] = size;
 190       else
 191          icoord[ch] = util_ifloor(s[ch] * size);
 192    }
 193 }
 194
 195
 196 static void
 197 wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4])
 198 {
 199    uint ch;
 200    const float min = 1.0F / (2.0F * size);
 201    const float max = 1.0F - min;
 202    for (ch = 0; ch < 4; ch++) {
 203       const int flr = util_ifloor(s[ch]);
 204       float u;
 205       if (flr & 1)
 206          u = 1.0F - (s[ch] - (float) flr);
 207       else
 208          u = s[ch] - (float) flr;
 209       if (u < min)
 210          icoord[ch] = 0;
 211       else if (u > max)
 212          icoord[ch] = size - 1;
 213       else
 214          icoord[ch] = util_ifloor(u * size);
 215    }
 216 }
 217
 218
 219 static void
 220 wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4])
 221 {
 222    uint ch;
 223    for (ch = 0; ch < 4; ch++) {
 224       /* s limited to [0,1] */
 225       /* i limited to [0,size-1] */
 226       const float u = fabsf(s[ch]);
 227       if (u <= 0.0F)
 228          icoord[ch] = 0;
 229       else if (u >= 1.0F)
 230          icoord[ch] = size - 1;
 231       else
 232          icoord[ch] = util_ifloor(u * size);
 233    }
 234 }
 235
 236
 237 static void
 238 wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
 239                                   int icoord[4])
 240 {
 241    uint ch;
 242    /* s limited to [min,max] */
 243    /* i limited to [0, size-1] */
 244    const float min = 1.0F / (2.0F * size);
 245    const float max = 1.0F - min;
 246    for (ch = 0; ch < 4; ch++) {
 247       const float u = fabsf(s[ch]);
 248       if (u < min)
 249          icoord[ch] = 0;
 250       else if (u > max)
 251          icoord[ch] = size - 1;
 252       else
 253          icoord[ch] = util_ifloor(u * size);
 254    }
 255 }
 256
 257
 258 static void
 259 wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
 260                                     int icoord[4])
 261 {
 262    uint ch;
 263    /* s limited to [min,max] */
 264    /* i limited to [0, size-1] */
 265    const float min = -1.0F / (2.0F * size);
 266    const float max = 1.0F - min;
 267    for (ch = 0; ch < 4; ch++) {
 268       const float u = fabsf(s[ch]);
 269       if (u < min)
 270          icoord[ch] = -1;
 271       else if (u > max)
 272          icoord[ch] = size;
 273       else
 274          icoord[ch] = util_ifloor(u * size);
 275    }
 276 }
 277
 278
 279 /**
 280  * Used to compute texel locations for linear sampling for four texcoords.
 281  * \param wrapMode  PIPE_TEX_WRAP_x
 282  * \param s  the texcoords
 283  * \param size  the texture image size
 284  * \param icoord0  returns first texture indexes
 285  * \param icoord1  returns second texture indexes (usually icoord0 + 1)
 286  * \param w  returns blend factor/weight between texture indexes
 287  * \param icoord  returns the computed integer texture coords
 288  */
 289 static void
 290 wrap_linear_repeat(const float s[4], unsigned size,
 291                    int icoord0[4], int icoord1[4], float w[4])
 292 {
 293    uint ch;
 294    for (ch = 0; ch < 4; ch++) {
 295       float u = s[ch] * size - 0.5F;
 296       icoord0[ch] = repeat(util_ifloor(u), size);
 297       icoord1[ch] = repeat(icoord0[ch] + 1, size);
 298       w[ch] = frac(u);
 299    }
 300 }
 301
 302
 303 static void
 304 wrap_linear_clamp(const float s[4], unsigned size,
 305                   int icoord0[4], int icoord1[4], float w[4])
 306 {
 307    uint ch;
 308    for (ch = 0; ch < 4; ch++) {
 309       float u = CLAMP(s[ch], 0.0F, 1.0F);
 310       u = u * size - 0.5f;
 311       icoord0[ch] = util_ifloor(u);
 312       icoord1[ch] = icoord0[ch] + 1;
 313       w[ch] = frac(u);
 314    }
 315 }
 316
 317
 318 static void
 319 wrap_linear_clamp_to_edge(const float s[4], unsigned size,
 320                           int icoord0[4], int icoord1[4], float w[4])
 321 {
 322    uint ch;
 323    for (ch = 0; ch < 4; ch++) {
 324       float u = CLAMP(s[ch], 0.0F, 1.0F);
 325       u = u * size - 0.5f;
 326       icoord0[ch] = util_ifloor(u);
 327       icoord1[ch] = icoord0[ch] + 1;
 328       if (icoord0[ch] < 0)
 329          icoord0[ch] = 0;
 330       if (icoord1[ch] >= (int) size)
 331          icoord1[ch] = size - 1;
 332       w[ch] = frac(u);
 333    }
 334 }
 335
 336
 337 static void
 338 wrap_linear_clamp_to_border(const float s[4], unsigned size,
 339                             int icoord0[4], int icoord1[4], float w[4])
 340 {
 341    const float min = -1.0F / (2.0F * size);
 342    const float max = 1.0F - min;
 343    uint ch;
 344    for (ch = 0; ch < 4; ch++) {
 345       float u = CLAMP(s[ch], min, max);
 346       u = u * size - 0.5f;
 347       icoord0[ch] = util_ifloor(u);
 348       icoord1[ch] = icoord0[ch] + 1;
 349       w[ch] = frac(u);
 350    }
 351 }
 352
 353
 354 static void
 355 wrap_linear_mirror_repeat(const float s[4], unsigned size,
 356                           int icoord0[4], int icoord1[4], float w[4])
 357 {
 358    uint ch;
 359    for (ch = 0; ch < 4; ch++) {
 360       const int flr = util_ifloor(s[ch]);
 361       float u;
 362       if (flr & 1)
 363          u = 1.0F - (s[ch] - (float) flr);
 364       else
 365          u = s[ch] - (float) flr;
 366       u = u * size - 0.5F;
 367       icoord0[ch] = util_ifloor(u);
 368       icoord1[ch] = icoord0[ch] + 1;
 369       if (icoord0[ch] < 0)
 370          icoord0[ch] = 0;
 371       if (icoord1[ch] >= (int) size)
 372          icoord1[ch] = size - 1;
 373       w[ch] = frac(u);
 374    }
 375 }
 376
 377
 378 static void
 379 wrap_linear_mirror_clamp(const float s[4], unsigned size,
 380                          int icoord0[4], int icoord1[4], float w[4])
 381 {
 382    uint ch;
 383    for (ch = 0; ch < 4; ch++) {
 384       float u = fabsf(s[ch]);
 385       if (u >= 1.0F)
 386          u = (float) size;
 387       else
 388          u *= size;
 389       u -= 0.5F;
 390       icoord0[ch] = util_ifloor(u);
 391       icoord1[ch] = icoord0[ch] + 1;
 392       w[ch] = frac(u);
 393    }
 394 }
 395
 396
 397 static void
 398 wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
 399                                  int icoord0[4], int icoord1[4], float w[4])
 400 {
 401    uint ch;
 402    for (ch = 0; ch < 4; ch++) {
 403       float u = fabsf(s[ch]);
 404       if (u >= 1.0F)
 405          u = (float) size;
 406       else
 407          u *= size;
 408       u -= 0.5F;
 409       icoord0[ch] = util_ifloor(u);
 410       icoord1[ch] = icoord0[ch] + 1;
 411       if (icoord0[ch] < 0)
 412          icoord0[ch] = 0;
 413       if (icoord1[ch] >= (int) size)
 414          icoord1[ch] = size - 1;
 415       w[ch] = frac(u);
 416    }
 417 }
 418
 419
 420 static void
 421 wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
 422                                    int icoord0[4], int icoord1[4], float w[4])
 423 {
 424    const float min = -1.0F / (2.0F * size);
 425    const float max = 1.0F - min;
 426    uint ch;
 427    for (ch = 0; ch < 4; ch++) {
 428       float u = fabsf(s[ch]);
 429       if (u <= min)
 430          u = min * size;
 431       else if (u >= max)
 432          u = max * size;
 433       else
 434          u *= size;
 435       u -= 0.5F;
 436       icoord0[ch] = util_ifloor(u);
 437       icoord1[ch] = icoord0[ch] + 1;
 438       w[ch] = frac(u);
 439    }
 440 }
 441
 442
 443 /**
 444  * For RECT textures / unnormalized texcoords
 445  * Only a subset of wrap modes supported.
 446  */
 447 static void
 448 wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4])
 449 {
 450    uint ch;
 451    for (ch = 0; ch < 4; ch++) {
 452       int i = util_ifloor(s[ch]);
 453       icoord[ch]= CLAMP(i, 0, (int) size-1);
 454    }
 455 }
 456
 457
 458 /**
 459  * Handles clamp_to_edge and clamp_to_border:
 460  */
 461 static void
 462 wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
 463                                    int icoord[4])
 464 {
 465    uint ch;
 466    for (ch = 0; ch < 4; ch++) {
 467       icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
 468    }
 469 }
 470
 471
 472 /**
 473  * For RECT textures / unnormalized texcoords.
 474  * Only a subset of wrap modes supported.
 475  */
 476 static void
 477 wrap_linear_unorm_clamp(const float s[4], unsigned size,
 478                         int icoord0[4], int icoord1[4], float w[4])
 479 {
 480    uint ch;
 481    for (ch = 0; ch < 4; ch++) {
 482       /* Not exactly what the spec says, but it matches NVIDIA output */
 483       float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
 484       icoord0[ch] = util_ifloor(u);
 485       icoord1[ch] = icoord0[ch] + 1;
 486       w[ch] = frac(u);
 487    }
 488 }
 489
 490
 491 static void
 492 wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
 493                                   int icoord0[4], int icoord1[4], float w[4])
 494 {
 495    uint ch;
 496    for (ch = 0; ch < 4; ch++) {
 497       float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
 498       u -= 0.5F;
 499       icoord0[ch] = util_ifloor(u);
 500       icoord1[ch] = icoord0[ch] + 1;
 501       if (icoord1[ch] > (int) size - 1)
 502          icoord1[ch] = size - 1;
 503       w[ch] = frac(u);
 504    }
 505 }
 506
 507
 508
 509 /**
 510  * Examine the quad's texture coordinates to compute the partial
 511  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 512  */
 513 static float
 514 compute_lambda_1d(const struct sp_sampler_varient *samp,
 515                   const float s[QUAD_SIZE],
 516                   const float t[QUAD_SIZE],
 517                   const float p[QUAD_SIZE])
 518 {
 519    const struct pipe_texture *texture = samp->texture;
 520    const struct pipe_sampler_state *sampler = samp->sampler;
 521    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 522    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 523    float rho = MAX2(dsdx, dsdy) * texture->width0;
 524
 525    return util_fast_log2(rho);
 526 }
 527
 528
 529 static float
 530 compute_lambda_2d(const struct sp_sampler_varient *samp,
 531                   const float s[QUAD_SIZE],
 532                   const float t[QUAD_SIZE],
 533                   const float p[QUAD_SIZE])
 534 {
 535    const struct pipe_texture *texture = samp->texture;
 536    const struct pipe_sampler_state *sampler = samp->sampler;
 537    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 538    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 539    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 540    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 541    float maxx = MAX2(dsdx, dsdy) * texture->width0;
 542    float maxy = MAX2(dtdx, dtdy) * texture->height0;
 543    float rho  = MAX2(maxx, maxy);
 544
 545    return util_fast_log2(rho);
 546 }
 547
 548
 549 static float
 550 compute_lambda_3d(const struct sp_sampler_varient *samp,
 551                   const float s[QUAD_SIZE],
 552                   const float t[QUAD_SIZE],
 553                   const float p[QUAD_SIZE])
 554 {
 555    const struct pipe_texture *texture = samp->texture;
 556    const struct pipe_sampler_state *sampler = samp->sampler;
 557    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 558    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 559    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 560    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 561    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 562    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 563    float maxx = MAX2(dsdx, dsdy) * texture->width0;
 564    float maxy = MAX2(dtdx, dtdy) * texture->height0;
 565    float maxz = MAX2(dpdx, dpdy) * texture->depth0;
 566    float rho;
 567
 568    rho = MAX2(maxx, maxy);
 569    rho = MAX2(rho, maxz);
 570
 571    return util_fast_log2(rho);
 572 }
 573
 574
 575 /**
 576  * Compute lambda for a vertex texture sampler.
 577  * Since there aren't derivatives to use, just return 0.
 578  */
 579 static float
 580 compute_lambda_vert(const struct sp_sampler_varient *samp,
 581                     const float s[QUAD_SIZE],
 582                     const float t[QUAD_SIZE],
 583                     const float p[QUAD_SIZE])
 584 {
 585    return 0.0f;
 586 }
 587
 588
 589
 590 /**
 591  * Get a texel from a texture, using the texture tile cache.
 592  *
 593  * \param addr  the template tex address containing cube, z, face info.
 594  * \param x  the x coord of texel within 2D image
 595  * \param y  the y coord of texel within 2D image
 596  * \param rgba  the quad to put the texel/color into
 597  *
 598  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 599  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
 600  */
 601
 602
 603
 604
 605 static INLINE const float *
 606 get_texel_2d_no_border(const struct sp_sampler_varient *samp,
 607                        union tex_tile_address addr, int x, int y)
 608 {
 609    const struct softpipe_tex_cached_tile *tile;
 610
 611    addr.bits.x = x / TILE_SIZE;
 612    addr.bits.y = y / TILE_SIZE;
 613    y %= TILE_SIZE;
 614    x %= TILE_SIZE;
 615
 616    tile = sp_get_cached_tile_tex(samp->cache, addr);
 617
 618    return &tile->data.color[y][x][0];
 619 }
 620
 621
 622 static INLINE const float *
 623 get_texel_2d(const struct sp_sampler_varient *samp,
 624              union tex_tile_address addr, int x, int y)
 625 {
 626    const struct pipe_texture *texture = samp->texture;
 627    unsigned level = addr.bits.level;
 628
 629    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 630        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 631       return samp->sampler->border_color;
 632    }
 633    else {
 634       return get_texel_2d_no_border( samp, addr, x, y );
 635    }
 636 }
 637
 638
 639 /* Gather a quad of adjacent texels within a tile:
 640  */
 641 static INLINE void
 642 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
 643                                         union tex_tile_address addr,
 644                                         unsigned x, unsigned y,
 645                                         const float *out[4])
 646 {
 647    const struct softpipe_tex_cached_tile *tile;
 648
 649    addr.bits.x = x / TILE_SIZE;
 650    addr.bits.y = y / TILE_SIZE;
 651    y %= TILE_SIZE;
 652    x %= TILE_SIZE;
 653
 654    tile = sp_get_cached_tile_tex(samp->cache, addr);
 655
 656    out[0] = &tile->data.color[y  ][x  ][0];
 657    out[1] = &tile->data.color[y  ][x+1][0];
 658    out[2] = &tile->data.color[y+1][x  ][0];
 659    out[3] = &tile->data.color[y+1][x+1][0];
 660 }
 661
 662
 663 /* Gather a quad of potentially non-adjacent texels:
 664  */
 665 static INLINE void
 666 get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
 667                             union tex_tile_address addr,
 668                             int x0, int y0,
 669                             int x1, int y1,
 670                             const float *out[4])
 671 {
 672    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
 673    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
 674    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
 675    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
 676 }
 677
 678 /* Can involve a lot of unnecessary checks for border color:
 679  */
 680 static INLINE void
 681 get_texel_quad_2d(const struct sp_sampler_varient *samp,
 682                   union tex_tile_address addr,
 683                   int x0, int y0,
 684                   int x1, int y1,
 685                   const float *out[4])
 686 {
 687    out[0] = get_texel_2d( samp, addr, x0, y0 );
 688    out[1] = get_texel_2d( samp, addr, x1, y0 );
 689    out[3] = get_texel_2d( samp, addr, x1, y1 );
 690    out[2] = get_texel_2d( samp, addr, x0, y1 );
 691 }
 692
 693
 694
 695 /* 3d varients:
 696  */
 697 static INLINE const float *
 698 get_texel_3d_no_border(const struct sp_sampler_varient *samp,
 699                        union tex_tile_address addr, int x, int y, int z)
 700 {
 701    const struct softpipe_tex_cached_tile *tile;
 702
 703    addr.bits.x = x / TILE_SIZE;
 704    addr.bits.y = y / TILE_SIZE;
 705    addr.bits.z = z;
 706    y %= TILE_SIZE;
 707    x %= TILE_SIZE;
 708
 709    tile = sp_get_cached_tile_tex(samp->cache, addr);
 710
 711    return &tile->data.color[y][x][0];
 712 }
 713
 714
 715 static INLINE const float *
 716 get_texel_3d(const struct sp_sampler_varient *samp,
 717              union tex_tile_address addr, int x, int y, int z)
 718 {
 719    const struct pipe_texture *texture = samp->texture;
 720    unsigned level = addr.bits.level;
 721
 722    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 723        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 724        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 725       return samp->sampler->border_color;
 726    }
 727    else {
 728       return get_texel_3d_no_border( samp, addr, x, y, z );
 729    }
 730 }
 731
 732
 733 /**
 734  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 735  * return the size (in texels) of that mipmap level.
 736  * For example, if level[0].width = 256 then base_pot will be 8.
 737  * If level = 2, then we'll return 64 (the width at level=2).
 738  * Return 1 if level > base_pot.
 739  */
 740 static INLINE unsigned
 741 pot_level_size(unsigned base_pot, unsigned level)
 742 {
 743    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 744 }
 745
 746
 747 /* Some image-filter fastpaths:
 748  */
 749 static INLINE void
 750 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 751                                 const float s[QUAD_SIZE],
 752                                 const float t[QUAD_SIZE],
 753                                 const float p[QUAD_SIZE],
 754                                 const float lodbias[QUAD_SIZE],
 755                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 756 {
 757    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 758    unsigned  j;
 759    unsigned level = samp->level;
 760    unsigned xpot = pot_level_size(samp->xpot, level);
 761    unsigned ypot = pot_level_size(samp->ypot, level);
 762    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 763    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 764    union tex_tile_address addr;
 765
 766    addr.value = 0;
 767    addr.bits.level = samp->level;
 768
 769    for (j = 0; j < QUAD_SIZE; j++) {
 770       int c;
 771
 772       float u = s[j] * xpot - 0.5F;
 773       float v = t[j] * ypot - 0.5F;
 774
 775       int uflr = util_ifloor(u);
 776       int vflr = util_ifloor(v);
 777
 778       float xw = u - (float)uflr;
 779       float yw = v - (float)vflr;
 780
 781       int x0 = uflr & (xpot - 1);
 782       int y0 = vflr & (ypot - 1);
 783
 784       const float *tx[4];
 785
 786       /* Can we fetch all four at once:
 787        */
 788       if (x0 < xmax && y0 < ymax) {
 789          get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
 790       }
 791       else {
 792          unsigned x1 = (x0 + 1) & (xpot - 1);
 793          unsigned y1 = (y0 + 1) & (ypot - 1);
 794          get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
 795       }
 796
 797       /* interpolate R, G, B, A */
 798       for (c = 0; c < 4; c++) {
 799          rgba[c][j] = lerp_2d(xw, yw,
 800                               tx[0][c], tx[1][c],
 801                               tx[2][c], tx[3][c]);
 802       }
 803    }
 804 }
 805
 806
 807 static INLINE void
 808 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 809                                  const float s[QUAD_SIZE],
 810                                  const float t[QUAD_SIZE],
 811                                  const float p[QUAD_SIZE],
 812                                  const float lodbias[QUAD_SIZE],
 813                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 814 {
 815    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 816    unsigned  j;
 817    unsigned level = samp->level;
 818    unsigned xpot = pot_level_size(samp->xpot, level);
 819    unsigned ypot = pot_level_size(samp->ypot, level);
 820    union tex_tile_address addr;
 821
 822    addr.value = 0;
 823    addr.bits.level = samp->level;
 824
 825    for (j = 0; j < QUAD_SIZE; j++) {
 826       int c;
 827
 828       float u = s[j] * xpot;
 829       float v = t[j] * ypot;
 830
 831       int uflr = util_ifloor(u);
 832       int vflr = util_ifloor(v);
 833
 834       int x0 = uflr & (xpot - 1);
 835       int y0 = vflr & (ypot - 1);
 836
 837       const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
 838
 839       for (c = 0; c < 4; c++) {
 840          rgba[c][j] = out[c];
 841       }
 842    }
 843 }
 844
 845
 846 static INLINE void
 847 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 848                                 const float s[QUAD_SIZE],
 849                                 const float t[QUAD_SIZE],
 850                                 const float p[QUAD_SIZE],
 851                                 const float lodbias[QUAD_SIZE],
 852                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 853 {
 854    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 855    unsigned  j;
 856    unsigned level = samp->level;
 857    unsigned xpot = pot_level_size(samp->xpot, level);
 858    unsigned ypot = pot_level_size(samp->ypot, level);
 859    union tex_tile_address addr;
 860
 861    addr.value = 0;
 862    addr.bits.level = samp->level;
 863
 864    for (j = 0; j < QUAD_SIZE; j++) {
 865       int c;
 866
 867       float u = s[j] * xpot;
 868       float v = t[j] * ypot;
 869
 870       int x0, y0;
 871       const float *out;
 872
 873       x0 = util_ifloor(u);
 874       if (x0 < 0)
 875          x0 = 0;
 876       else if (x0 > xpot - 1)
 877          x0 = xpot - 1;
 878
 879       y0 = util_ifloor(v);
 880       if (y0 < 0)
 881          y0 = 0;
 882       else if (y0 > ypot - 1)
 883          y0 = ypot - 1;
 884
 885       out = get_texel_2d_no_border(samp, addr, x0, y0);
 886
 887       for (c = 0; c < 4; c++) {
 888          rgba[c][j] = out[c];
 889       }
 890    }
 891 }
 892
 893
 894 static void
 895 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
 896                         const float s[QUAD_SIZE],
 897                         const float t[QUAD_SIZE],
 898                         const float p[QUAD_SIZE],
 899                         const float lodbias[QUAD_SIZE],
 900                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 901 {
 902    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 903    const struct pipe_texture *texture = samp->texture;
 904    unsigned level0, j;
 905    int width;
 906    int x[4];
 907    union tex_tile_address addr;
 908
 909    level0 = samp->level;
 910    width = u_minify(texture->width0, level0);
 911
 912    assert(width > 0);
 913
 914    addr.value = 0;
 915    addr.bits.level = samp->level;
 916
 917    samp->nearest_texcoord_s(s, width, x);
 918
 919    for (j = 0; j < QUAD_SIZE; j++) {
 920       const float *out = get_texel_2d(samp, addr, x[j], 0);
 921       int c;
 922       for (c = 0; c < 4; c++) {
 923          rgba[c][j] = out[c];
 924       }
 925    }
 926 }
 927
 928
 929 static void
 930 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
 931                       const float s[QUAD_SIZE],
 932                       const float t[QUAD_SIZE],
 933                       const float p[QUAD_SIZE],
 934                       const float lodbias[QUAD_SIZE],
 935                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 936 {
 937    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 938    const struct pipe_texture *texture = samp->texture;
 939    unsigned level0, j;
 940    int width, height;
 941    int x[4], y[4];
 942    union tex_tile_address addr;
 943
 944
 945    level0 = samp->level;
 946    width = u_minify(texture->width0, level0);
 947    height = u_minify(texture->height0, level0);
 948
 949    assert(width > 0);
 950    assert(height > 0);
 951
 952    addr.value = 0;
 953    addr.bits.level = samp->level;
 954
 955    samp->nearest_texcoord_s(s, width, x);
 956    samp->nearest_texcoord_t(t, height, y);
 957
 958    for (j = 0; j < QUAD_SIZE; j++) {
 959       const float *out = get_texel_2d(samp, addr, x[j], y[j]);
 960       int c;
 961       for (c = 0; c < 4; c++) {
 962          rgba[c][j] = out[c];
 963       }
 964    }
 965 }
 966
 967
 968 static INLINE union tex_tile_address
 969 face(union tex_tile_address addr, unsigned face )
 970 {
 971    addr.bits.face = face;
 972    return addr;
 973 }
 974
 975
 976 static void
 977 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
 978                         const float s[QUAD_SIZE],
 979                         const float t[QUAD_SIZE],
 980                         const float p[QUAD_SIZE],
 981                         const float lodbias[QUAD_SIZE],
 982                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 983 {
 984    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
 985    const struct pipe_texture *texture = samp->texture;
 986    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
 987    unsigned level0, j;
 988    int width, height;
 989    int x[4], y[4];
 990    union tex_tile_address addr;
 991
 992    level0 = samp->level;
 993    width = u_minify(texture->width0, level0);
 994    height = u_minify(texture->height0, level0);
 995
 996    assert(width > 0);
 997    assert(height > 0);
 998
 999    addr.value = 0;
1000    addr.bits.level = samp->level;
1001
1002    samp->nearest_texcoord_s(s, width, x);
1003    samp->nearest_texcoord_t(t, height, y);
1004
1005    for (j = 0; j < QUAD_SIZE; j++) {
1006       const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
1007       int c;
1008       for (c = 0; c < 4; c++) {
1009          rgba[c][j] = out[c];
1010       }
1011    }
1012 }
1013
1014
1015 static void
1016 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1017                       const float s[QUAD_SIZE],
1018                       const float t[QUAD_SIZE],
1019                       const float p[QUAD_SIZE],
1020                       const float lodbias[QUAD_SIZE],
1021                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1022 {
1023    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1024    const struct pipe_texture *texture = samp->texture;
1025    unsigned level0, j;
1026    int width, height, depth;
1027    int x[4], y[4], z[4];
1028    union tex_tile_address addr;
1029
1030    level0 = samp->level;
1031    width = u_minify(texture->width0, level0);
1032    height = u_minify(texture->height0, level0);
1033    depth = u_minify(texture->depth0, level0);
1034
1035    assert(width > 0);
1036    assert(height > 0);
1037    assert(depth > 0);
1038
1039    samp->nearest_texcoord_s(s, width,  x);
1040    samp->nearest_texcoord_t(t, height, y);
1041    samp->nearest_texcoord_p(p, depth,  z);
1042
1043    addr.value = 0;
1044    addr.bits.level = samp->level;
1045
1046    for (j = 0; j < QUAD_SIZE; j++) {
1047       const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
1048       int c;
1049       for (c = 0; c < 4; c++) {
1050          rgba[c][j] = out[c];
1051       }
1052    }
1053 }
1054
1055
1056 static void
1057 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1058                      const float s[QUAD_SIZE],
1059                      const float t[QUAD_SIZE],
1060                      const float p[QUAD_SIZE],
1061                      const float lodbias[QUAD_SIZE],
1062                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1063 {
1064    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1065    const struct pipe_texture *texture = samp->texture;
1066    unsigned level0, j;
1067    int width;
1068    int x0[4], x1[4];
1069    float xw[4]; /* weights */
1070    union tex_tile_address addr;
1071
1072    level0 = samp->level;
1073    width = u_minify(texture->width0, level0);
1074
1075    assert(width > 0);
1076
1077    addr.value = 0;
1078    addr.bits.level = samp->level;
1079
1080    samp->linear_texcoord_s(s, width, x0, x1, xw);
1081
1082    for (j = 0; j < QUAD_SIZE; j++) {
1083       const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
1084       const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
1085       int c;
1086
1087       /* interpolate R, G, B, A */
1088       for (c = 0; c < 4; c++) {
1089          rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1090       }
1091    }
1092 }
1093
1094
1095 static void
1096 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1097                      const float s[QUAD_SIZE],
1098                      const float t[QUAD_SIZE],
1099                      const float p[QUAD_SIZE],
1100                      const float lodbias[QUAD_SIZE],
1101                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1102 {
1103    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1104    const struct pipe_texture *texture = samp->texture;
1105    unsigned level0, j;
1106    int width, height;
1107    int x0[4], y0[4], x1[4], y1[4];
1108    float xw[4], yw[4]; /* weights */
1109    union tex_tile_address addr;
1110
1111    level0 = samp->level;
1112    width = u_minify(texture->width0, level0);
1113    height = u_minify(texture->height0, level0);
1114
1115    assert(width > 0);
1116    assert(height > 0);
1117
1118    addr.value = 0;
1119    addr.bits.level = samp->level;
1120
1121    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1122    samp->linear_texcoord_t(t, height, y0, y1, yw);
1123
1124    for (j = 0; j < QUAD_SIZE; j++) {
1125       const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
1126       const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
1127       const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
1128       const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
1129       int c;
1130
1131       /* interpolate R, G, B, A */
1132       for (c = 0; c < 4; c++) {
1133          rgba[c][j] = lerp_2d(xw[j], yw[j],
1134                               tx0[c], tx1[c],
1135                               tx2[c], tx3[c]);
1136       }
1137    }
1138 }
1139
1140
1141 static void
1142 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1143                        const float s[QUAD_SIZE],
1144                        const float t[QUAD_SIZE],
1145                        const float p[QUAD_SIZE],
1146                        const float lodbias[QUAD_SIZE],
1147                        float rgba[NUM_CHANNELS][QUAD_SIZE])
1148 {
1149    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1150    const struct pipe_texture *texture = samp->texture;
1151    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1152    unsigned level0, j;
1153    int width, height;
1154    int x0[4], y0[4], x1[4], y1[4];
1155    float xw[4], yw[4]; /* weights */
1156    union tex_tile_address addr;
1157
1158    level0 = samp->level;
1159    width = u_minify(texture->width0, level0);
1160    height = u_minify(texture->height0, level0);
1161
1162    assert(width > 0);
1163    assert(height > 0);
1164
1165    addr.value = 0;
1166    addr.bits.level = samp->level;
1167
1168    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1169    samp->linear_texcoord_t(t, height, y0, y1, yw);
1170
1171    for (j = 0; j < QUAD_SIZE; j++) {
1172       union tex_tile_address addrj = face(addr, faces[j]);
1173       const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
1174       const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
1175       const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
1176       const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
1177       int c;
1178
1179       /* interpolate R, G, B, A */
1180       for (c = 0; c < 4; c++) {
1181          rgba[c][j] = lerp_2d(xw[j], yw[j],
1182                               tx0[c], tx1[c],
1183                               tx2[c], tx3[c]);
1184       }
1185    }
1186 }
1187
1188
1189 static void
1190 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1191                      const float s[QUAD_SIZE],
1192                      const float t[QUAD_SIZE],
1193                      const float p[QUAD_SIZE],
1194                      const float lodbias[QUAD_SIZE],
1195                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1196 {
1197    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1198    const struct pipe_texture *texture = samp->texture;
1199    unsigned level0, j;
1200    int width, height, depth;
1201    int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1202    float xw[4], yw[4], zw[4]; /* interpolation weights */
1203    union tex_tile_address addr;
1204
1205    level0 = samp->level;
1206    width = u_minify(texture->width0, level0);
1207    height = u_minify(texture->height0, level0);
1208    depth = u_minify(texture->depth0, level0);
1209
1210    addr.value = 0;
1211    addr.bits.level = level0;
1212
1213    assert(width > 0);
1214    assert(height > 0);
1215    assert(depth > 0);
1216
1217    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1218    samp->linear_texcoord_t(t, height, y0, y1, yw);
1219    samp->linear_texcoord_p(p, depth,  z0, z1, zw);
1220
1221    for (j = 0; j < QUAD_SIZE; j++) {
1222       int c;
1223
1224       const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
1225       const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
1226       const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
1227       const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
1228
1229       const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
1230       const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
1231       const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
1232       const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
1233
1234       /* interpolate R, G, B, A */
1235       for (c = 0; c < 4; c++) {
1236          rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1237                               tx00[c], tx01[c],
1238                               tx02[c], tx03[c],
1239                               tx10[c], tx11[c],
1240                               tx12[c], tx13[c]);
1241       }
1242    }
1243 }
1244
1245
1246 /* Calculate level of detail for every fragment.
1247  * Note that lambda has already been biased by global LOD bias.
1248  */
1249 static INLINE void
1250 compute_lod(const struct pipe_sampler_state *sampler,
1251             const float biased_lambda,
1252             const float lodbias[QUAD_SIZE],
1253             float lod[QUAD_SIZE])
1254 {
1255    uint i;
1256
1257    for (i = 0; i < QUAD_SIZE; i++) {
1258       lod[i] = biased_lambda + lodbias[i];
1259       lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
1260    }
1261 }
1262
1263
1264 static void
1265 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1266                   const float s[QUAD_SIZE],
1267                   const float t[QUAD_SIZE],
1268                   const float p[QUAD_SIZE],
1269                   const float lodbias[QUAD_SIZE],
1270                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1271 {
1272    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1273    const struct pipe_texture *texture = samp->texture;
1274    int level0;
1275    float lambda;
1276    float lod[QUAD_SIZE];
1277
1278    lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1279
1280    compute_lod(samp->sampler, lambda, lodbias, lod);
1281
1282    /* XXX: Take into account all lod values.
1283     */
1284    lambda = lod[0];
1285    level0 = (int)lambda;
1286
1287    if (lambda < 0.0) {
1288       samp->level = 0;
1289       samp->mag_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
1290    }
1291    else if (level0 >= texture->last_level) {
1292       samp->level = texture->last_level;
1293       samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
1294    }
1295    else {
1296       float levelBlend = lambda - level0;
1297       float rgba0[4][4];
1298       float rgba1[4][4];
1299       int c,j;
1300
1301       samp->level = level0;
1302       samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba0 );
1303
1304       samp->level = level0+1;
1305       samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba1 );
1306
1307       for (j = 0; j < QUAD_SIZE; j++) {
1308          for (c = 0; c < 4; c++) {
1309             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1310          }
1311       }
1312    }
1313 }
1314
1315
1316 static void
1317 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1318                    const float s[QUAD_SIZE],
1319                    const float t[QUAD_SIZE],
1320                    const float p[QUAD_SIZE],
1321                    const float lodbias[QUAD_SIZE],
1322                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1323 {
1324    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1325    const struct pipe_texture *texture = samp->texture;
1326    float lambda;
1327    float lod[QUAD_SIZE];
1328
1329    lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1330
1331    compute_lod(samp->sampler, lambda, lodbias, lod);
1332
1333    /* XXX: Take into account all lod values.
1334     */
1335    lambda = lod[0];
1336
1337    if (lambda < 0.0) {
1338       samp->level = 0;
1339       samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1340    }
1341    else {
1342       samp->level = (int)(lambda + 0.5) ;
1343       samp->level = MIN2(samp->level, (int)texture->last_level);
1344       samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
1345    }
1346
1347 #if 0
1348    printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1349           rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1350           rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1351           rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1352           rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1353 #endif
1354 }
1355
1356
1357 static void
1358 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1359                 const float s[QUAD_SIZE],
1360                 const float t[QUAD_SIZE],
1361                 const float p[QUAD_SIZE],
1362                 const float lodbias[QUAD_SIZE],
1363                 float rgba[NUM_CHANNELS][QUAD_SIZE])
1364 {
1365    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1366    float lambda;
1367    float lod[QUAD_SIZE];
1368
1369    lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1370
1371    compute_lod(samp->sampler, lambda, lodbias, lod);
1372
1373    /* XXX: Take into account all lod values.
1374     */
1375    lambda = lod[0];
1376
1377    if (lambda < 0.0) {
1378       samp->mag_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
1379    }
1380    else {
1381       samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
1382    }
1383 }
1384
1385
1386
1387 /**
1388  * Specialized version of mip_filter_linear with hard-wired calls to
1389  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1390  */
1391 static void
1392 mip_filter_linear_2d_linear_repeat_POT(
1393    struct tgsi_sampler *tgsi_sampler,
1394    const float s[QUAD_SIZE],
1395    const float t[QUAD_SIZE],
1396    const float p[QUAD_SIZE],
1397    const float lodbias[QUAD_SIZE],
1398    float rgba[NUM_CHANNELS][QUAD_SIZE])
1399 {
1400    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1401    const struct pipe_texture *texture = samp->texture;
1402    int level0;
1403    float lambda;
1404    float lod[QUAD_SIZE];
1405
1406    lambda = compute_lambda_2d(samp, s, t, p) + samp->sampler->lod_bias;
1407
1408    compute_lod(samp->sampler, lambda, lodbias, lod);
1409
1410    /* XXX: Take into account all lod values.
1411     */
1412    lambda = lod[0];
1413    level0 = (int)lambda;
1414
1415    /* Catches both negative and large values of level0:
1416     */
1417    if ((unsigned)level0 >= texture->last_level) {
1418       if (level0 < 0)
1419          samp->level = 0;
1420       else
1421          samp->level = texture->last_level;
1422
1423       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba );
1424    }
1425    else {
1426       float levelBlend = lambda - level0;
1427       float rgba0[4][4];
1428       float rgba1[4][4];
1429       int c,j;
1430
1431       samp->level = level0;
1432       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba0 );
1433
1434       samp->level = level0+1;
1435       img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba1 );
1436
1437       for (j = 0; j < QUAD_SIZE; j++) {
1438          for (c = 0; c < 4; c++) {
1439             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1440          }
1441       }
1442    }
1443 }
1444
1445
1446
1447 /**
1448  * Do shadow/depth comparisons.
1449  */
1450 static void
1451 sample_compare(struct tgsi_sampler *tgsi_sampler,
1452                const float s[QUAD_SIZE],
1453                const float t[QUAD_SIZE],
1454                const float p[QUAD_SIZE],
1455                const float lodbias[QUAD_SIZE],
1456                float rgba[NUM_CHANNELS][QUAD_SIZE])
1457 {
1458    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1459    const struct pipe_sampler_state *sampler = samp->sampler;
1460    int j, k0, k1, k2, k3;
1461    float val;
1462
1463    samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
1464
1465    /**
1466     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1467     * When we sampled the depth texture, the depth value was put into all
1468     * RGBA channels.  We look at the red channel here.
1469     */
1470
1471    /* compare four texcoords vs. four texture samples */
1472    switch (sampler->compare_func) {
1473    case PIPE_FUNC_LESS:
1474       k0 = p[0] < rgba[0][0];
1475       k1 = p[1] < rgba[0][1];
1476       k2 = p[2] < rgba[0][2];
1477       k3 = p[3] < rgba[0][3];
1478       break;
1479    case PIPE_FUNC_LEQUAL:
1480       k0 = p[0] <= rgba[0][0];
1481       k1 = p[1] <= rgba[0][1];
1482       k2 = p[2] <= rgba[0][2];
1483       k3 = p[3] <= rgba[0][3];
1484       break;
1485    case PIPE_FUNC_GREATER:
1486       k0 = p[0] > rgba[0][0];
1487       k1 = p[1] > rgba[0][1];
1488       k2 = p[2] > rgba[0][2];
1489       k3 = p[3] > rgba[0][3];
1490       break;
1491    case PIPE_FUNC_GEQUAL:
1492       k0 = p[0] >= rgba[0][0];
1493       k1 = p[1] >= rgba[0][1];
1494       k2 = p[2] >= rgba[0][2];
1495       k3 = p[3] >= rgba[0][3];
1496       break;
1497    case PIPE_FUNC_EQUAL:
1498       k0 = p[0] == rgba[0][0];
1499       k1 = p[1] == rgba[0][1];
1500       k2 = p[2] == rgba[0][2];
1501       k3 = p[3] == rgba[0][3];
1502       break;
1503    case PIPE_FUNC_NOTEQUAL:
1504       k0 = p[0] != rgba[0][0];
1505       k1 = p[1] != rgba[0][1];
1506       k2 = p[2] != rgba[0][2];
1507       k3 = p[3] != rgba[0][3];
1508       break;
1509    case PIPE_FUNC_ALWAYS:
1510       k0 = k1 = k2 = k3 = 1;
1511       break;
1512    case PIPE_FUNC_NEVER:
1513       k0 = k1 = k2 = k3 = 0;
1514       break;
1515    default:
1516       k0 = k1 = k2 = k3 = 0;
1517       assert(0);
1518       break;
1519    }
1520
1521    /* convert four pass/fail values to an intensity in [0,1] */
1522    val = 0.25F * (k0 + k1 + k2 + k3);
1523
1524    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1525    for (j = 0; j < 4; j++) {
1526       rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
1527       rgba[3][j] = 1.0F;
1528    }
1529 }
1530
1531
1532 /**
1533  * Compute which cube face is referenced by each texcoord and put that
1534  * info into the sampler faces[] array.  Then sample the cube faces
1535  */
1536 static void
1537 sample_cube(struct tgsi_sampler *tgsi_sampler,
1538             const float s[QUAD_SIZE],
1539             const float t[QUAD_SIZE],
1540             const float p[QUAD_SIZE],
1541             const float lodbias[QUAD_SIZE],
1542             float rgba[NUM_CHANNELS][QUAD_SIZE])
1543 {
1544    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1545    unsigned j;
1546    float ssss[4], tttt[4];
1547
1548    /*
1549      major axis
1550      direction     target                             sc     tc    ma
1551      ----------    -------------------------------    ---    ---   ---
1552      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
1553      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
1554      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
1555      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
1556      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
1557      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
1558    */
1559    for (j = 0; j < QUAD_SIZE; j++) {
1560       float rx = s[j];
1561       float ry = t[j];
1562       float rz = p[j];
1563       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
1564       unsigned face;
1565       float sc, tc, ma;
1566
1567       if (arx >= ary && arx >= arz) {
1568          if (rx >= 0.0F) {
1569             face = PIPE_TEX_FACE_POS_X;
1570             sc = -rz;
1571             tc = -ry;
1572             ma = arx;
1573          }
1574          else {
1575             face = PIPE_TEX_FACE_NEG_X;
1576             sc = rz;
1577             tc = -ry;
1578             ma = arx;
1579          }
1580       }
1581       else if (ary >= arx && ary >= arz) {
1582          if (ry >= 0.0F) {
1583             face = PIPE_TEX_FACE_POS_Y;
1584             sc = rx;
1585             tc = rz;
1586             ma = ary;
1587          }
1588          else {
1589             face = PIPE_TEX_FACE_NEG_Y;
1590             sc = rx;
1591             tc = -rz;
1592             ma = ary;
1593          }
1594       }
1595       else {
1596          if (rz > 0.0F) {
1597             face = PIPE_TEX_FACE_POS_Z;
1598             sc = rx;
1599             tc = -ry;
1600             ma = arz;
1601          }
1602          else {
1603             face = PIPE_TEX_FACE_NEG_Z;
1604             sc = -rx;
1605             tc = -ry;
1606             ma = arz;
1607          }
1608       }
1609
1610       {
1611          const float ima = 1.0 / ma;
1612          ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
1613          tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
1614          samp->faces[j] = face;
1615       }
1616    }
1617
1618    /* In our little pipeline, the compare stage is next.  If compare
1619     * is not active, this will point somewhere deeper into the
1620     * pipeline, eg. to mip_filter or even img_filter.
1621     */
1622    samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
1623 }
1624
1625
1626
1627 static wrap_nearest_func
1628 get_nearest_unorm_wrap(unsigned mode)
1629 {
1630    switch (mode) {
1631    case PIPE_TEX_WRAP_CLAMP:
1632       return wrap_nearest_unorm_clamp;
1633    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1634    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1635       return wrap_nearest_unorm_clamp_to_border;
1636    default:
1637       assert(0);
1638       return wrap_nearest_unorm_clamp;
1639    }
1640 }
1641
1642
1643 static wrap_nearest_func
1644 get_nearest_wrap(unsigned mode)
1645 {
1646    switch (mode) {
1647    case PIPE_TEX_WRAP_REPEAT:
1648       return wrap_nearest_repeat;
1649    case PIPE_TEX_WRAP_CLAMP:
1650       return wrap_nearest_clamp;
1651    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1652       return wrap_nearest_clamp_to_edge;
1653    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1654       return wrap_nearest_clamp_to_border;
1655    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1656       return wrap_nearest_mirror_repeat;
1657    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1658       return wrap_nearest_mirror_clamp;
1659    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1660       return wrap_nearest_mirror_clamp_to_edge;
1661    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1662       return wrap_nearest_mirror_clamp_to_border;
1663    default:
1664       assert(0);
1665       return wrap_nearest_repeat;
1666    }
1667 }
1668
1669
1670 static wrap_linear_func
1671 get_linear_unorm_wrap(unsigned mode)
1672 {
1673    switch (mode) {
1674    case PIPE_TEX_WRAP_CLAMP:
1675       return wrap_linear_unorm_clamp;
1676    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1677    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1678       return wrap_linear_unorm_clamp_to_border;
1679    default:
1680       assert(0);
1681       return wrap_linear_unorm_clamp;
1682    }
1683 }
1684
1685
1686 static wrap_linear_func
1687 get_linear_wrap(unsigned mode)
1688 {
1689    switch (mode) {
1690    case PIPE_TEX_WRAP_REPEAT:
1691       return wrap_linear_repeat;
1692    case PIPE_TEX_WRAP_CLAMP:
1693       return wrap_linear_clamp;
1694    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1695       return wrap_linear_clamp_to_edge;
1696    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1697       return wrap_linear_clamp_to_border;
1698    case PIPE_TEX_WRAP_MIRROR_REPEAT:
1699       return wrap_linear_mirror_repeat;
1700    case PIPE_TEX_WRAP_MIRROR_CLAMP:
1701       return wrap_linear_mirror_clamp;
1702    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1703       return wrap_linear_mirror_clamp_to_edge;
1704    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1705       return wrap_linear_mirror_clamp_to_border;
1706    default:
1707       assert(0);
1708       return wrap_linear_repeat;
1709    }
1710 }
1711
1712
1713 static compute_lambda_func
1714 get_lambda_func(const union sp_sampler_key key)
1715 {
1716    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
1717       return compute_lambda_vert;
1718
1719    switch (key.bits.target) {
1720    case PIPE_TEXTURE_1D:
1721       return compute_lambda_1d;
1722    case PIPE_TEXTURE_2D:
1723    case PIPE_TEXTURE_CUBE:
1724       return compute_lambda_2d;
1725    case PIPE_TEXTURE_3D:
1726       return compute_lambda_3d;
1727    default:
1728       assert(0);
1729       return compute_lambda_1d;
1730    }
1731 }
1732
1733
1734 static filter_func
1735 get_img_filter(const union sp_sampler_key key,
1736                unsigned filter,
1737                const struct pipe_sampler_state *sampler)
1738 {
1739    switch (key.bits.target) {
1740    case PIPE_TEXTURE_1D:
1741       if (filter == PIPE_TEX_FILTER_NEAREST)
1742          return img_filter_1d_nearest;
1743       else
1744          return img_filter_1d_linear;
1745       break;
1746    case PIPE_TEXTURE_2D:
1747       /* Try for fast path:
1748        */
1749       if (key.bits.is_pot &&
1750           sampler->wrap_s == sampler->wrap_t &&
1751           sampler->normalized_coords)
1752       {
1753          switch (sampler->wrap_s) {
1754          case PIPE_TEX_WRAP_REPEAT:
1755             switch (filter) {
1756             case PIPE_TEX_FILTER_NEAREST:
1757                return img_filter_2d_nearest_repeat_POT;
1758             case PIPE_TEX_FILTER_LINEAR:
1759                return img_filter_2d_linear_repeat_POT;
1760             default:
1761                break;
1762             }
1763             break;
1764          case PIPE_TEX_WRAP_CLAMP:
1765             switch (filter) {
1766             case PIPE_TEX_FILTER_NEAREST:
1767                return img_filter_2d_nearest_clamp_POT;
1768             default:
1769                break;
1770             }
1771          }
1772       }
1773       /* Otherwise use default versions:
1774        */
1775       if (filter == PIPE_TEX_FILTER_NEAREST)
1776          return img_filter_2d_nearest;
1777       else
1778          return img_filter_2d_linear;
1779       break;
1780    case PIPE_TEXTURE_CUBE:
1781       if (filter == PIPE_TEX_FILTER_NEAREST)
1782          return img_filter_cube_nearest;
1783       else
1784          return img_filter_cube_linear;
1785       break;
1786    case PIPE_TEXTURE_3D:
1787       if (filter == PIPE_TEX_FILTER_NEAREST)
1788          return img_filter_3d_nearest;
1789       else
1790          return img_filter_3d_linear;
1791       break;
1792    default:
1793       assert(0);
1794       return img_filter_1d_nearest;
1795    }
1796 }
1797
1798
1799 /**
1800  * Bind the given texture object and texture cache to the sampler varient.
1801  */
1802 void
1803 sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
1804                                  struct softpipe_tex_tile_cache *tex_cache,
1805                                  const struct pipe_texture *texture )
1806 {
1807    const struct pipe_sampler_state *sampler = samp->sampler;
1808
1809    samp->texture = texture;
1810    samp->cache = tex_cache;
1811    samp->xpot = util_unsigned_logbase2( texture->width0 );
1812    samp->ypot = util_unsigned_logbase2( texture->height0 );
1813    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
1814 }
1815
1816
1817 void
1818 sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
1819 {
1820    FREE(samp);
1821 }
1822
1823
1824 /**
1825  * Create a sampler varient for a given set of non-orthogonal state.
1826  */
1827 struct sp_sampler_varient *
1828 sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
1829                            const union sp_sampler_key key )
1830 {
1831    struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
1832    if (!samp)
1833       return NULL;
1834
1835    samp->sampler = sampler;
1836    samp->key = key;
1837
1838    /* Note that (for instance) linear_texcoord_s and
1839     * nearest_texcoord_s may be active at the same time, if the
1840     * sampler min_img_filter differs from its mag_img_filter.
1841     */
1842    if (sampler->normalized_coords) {
1843       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
1844       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
1845       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
1846
1847       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
1848       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
1849       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
1850    }
1851    else {
1852       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
1853       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
1854       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
1855
1856       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
1857       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
1858       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
1859    }
1860
1861    samp->compute_lambda = get_lambda_func( key );
1862
1863    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
1864    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
1865
1866    switch (sampler->min_mip_filter) {
1867    case PIPE_TEX_MIPFILTER_NONE:
1868       if (sampler->min_img_filter == sampler->mag_img_filter)
1869          samp->mip_filter = samp->min_img_filter;
1870       else
1871          samp->mip_filter = mip_filter_none;
1872       break;
1873
1874    case PIPE_TEX_MIPFILTER_NEAREST:
1875       samp->mip_filter = mip_filter_nearest;
1876       break;
1877
1878    case PIPE_TEX_MIPFILTER_LINEAR:
1879       if (key.bits.is_pot &&
1880           sampler->min_img_filter == sampler->mag_img_filter &&
1881           sampler->normalized_coords &&
1882           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
1883           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
1884           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
1885       {
1886          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
1887       }
1888       else
1889       {
1890          samp->mip_filter = mip_filter_linear;
1891       }
1892       break;
1893    }
1894
1895    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
1896       samp->compare = sample_compare;
1897    }
1898    else {
1899       /* Skip compare operation by promoting the mip_filter function
1900        * pointer:
1901        */
1902       samp->compare = samp->mip_filter;
1903    }
1904
1905    if (key.bits.target == PIPE_TEXTURE_CUBE) {
1906       samp->base.get_samples = sample_cube;
1907    }
1908    else {
1909       samp->faces[0] = 0;
1910       samp->faces[1] = 0;
1911       samp->faces[2] = 0;
1912       samp->faces[3] = 0;
1913
1914       /* Skip cube face determination by promoting the compare
1915        * function pointer:
1916        */
1917       samp->base.get_samples = samp->compare;
1918    }
1919
1920    return samp;
1921 }