src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_memory.h"
  43 #include "util/u_inlines.h"
  44 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  45 #include "sp_tex_sample.h"
  46 #include "sp_texture.h"
  47 #include "sp_tex_tile_cache.h"
  48
  49
  50 /** Set to one to help debug texture sampling */
  51 #define DEBUG_TEX 0
  52
  53
  54 /*
  55  * Return fractional part of 'f'.  Used for computing interpolation weights.
  56  * Need to be careful with negative values.
  57  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  58  * of improperly weighted linear-filtered textures.
  59  * The tests/texwrap.c demo is a good test.
  60  */
  61 static INLINE float
  62 frac(float f)
  63 {
  64    return f - floorf(f);
  65 }
  66
  67
  68
  69 /**
  70  * Linear interpolation macro
  71  */
  72 static INLINE float
  73 lerp(float a, float v0, float v1)
  74 {
  75    return v0 + a * (v1 - v0);
  76 }
  77
  78
  79 /**
  80  * Do 2D/bilinear interpolation of float values.
  81  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  82  * a and b are the horizontal and vertical interpolants.
  83  * It's important that this function is inlined when compiled with
  84  * optimization!  If we find that's not true on some systems, convert
  85  * to a macro.
  86  */
  87 static INLINE float
  88 lerp_2d(float a, float b,
  89         float v00, float v10, float v01, float v11)
  90 {
  91    const float temp0 = lerp(a, v00, v10);
  92    const float temp1 = lerp(a, v01, v11);
  93    return lerp(b, temp0, temp1);
  94 }
  95
  96
  97 /**
  98  * As above, but 3D interpolation of 8 values.
  99  */
 100 static INLINE float
 101 lerp_3d(float a, float b, float c,
 102         float v000, float v100, float v010, float v110,
 103         float v001, float v101, float v011, float v111)
 104 {
 105    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 106    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 107    return lerp(c, temp0, temp1);
 108 }
 109
 110
 111
 112 /**
 113  * Compute coord % size for repeat wrap modes.
 114  * Note that if coord is negative, coord % size doesn't give the right
 115  * value.  To avoid that problem we add a large multiple of the size
 116  * (rather than using a conditional).
 117  */
 118 static INLINE int
 119 repeat(int coord, unsigned size)
 120 {
 121    return (coord + size * 1024) % size;
 122 }
 123
 124
 125 /**
 126  * Apply texture coord wrapping mode and return integer texture indexes
 127  * for a vector of four texcoords (S or T or P).
 128  * \param wrapMode  PIPE_TEX_WRAP_x
 129  * \param s  the incoming texcoords
 130  * \param size  the texture image size
 131  * \param icoord  returns the integer texcoords
 132  */
 133 static void
 134 wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
 135 {
 136    /* s limited to [0,1) */
 137    /* i limited to [0,size-1] */
 138    int i = util_ifloor(s * size);
 139    *icoord = repeat(i + offset, size);
 140 }
 141
 142
 143 static void
 144 wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord)
 145 {
 146    /* s limited to [0,1] */
 147    /* i limited to [0,size-1] */
 148    if (s <= 0.0F)
 149       *icoord = 0;
 150    else if (s >= 1.0F)
 151       *icoord = size - 1;
 152    else
 153       *icoord = util_ifloor(s * size);
 154    if (offset)
 155       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 156 }
 157
 158
 159 static void
 160 wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 161 {
 162    /* s limited to [min,max] */
 163    /* i limited to [0, size-1] */
 164    const float min = 1.0F / (2.0F * size);
 165    const float max = 1.0F - min;
 166
 167    if (s < min)
 168       *icoord = 0;
 169    else if (s > max)
 170       *icoord = size - 1;
 171    else
 172       *icoord = util_ifloor(s * size);
 173    if (offset)
 174       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 175 }
 176
 177
 178 static void
 179 wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 180 {
 181    /* s limited to [min,max] */
 182    /* i limited to [-1, size] */
 183    const float min = -1.0F / (2.0F * size);
 184    const float max = 1.0F - min;
 185    if (s <= min)
 186       *icoord = -1;
 187    else if (s >= max)
 188       *icoord = size;
 189    else
 190       *icoord = util_ifloor(s * size);
 191    if (offset)
 192       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 193 }
 194
 195
 196 static void
 197 wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
 198 {
 199    const float min = 1.0F / (2.0F * size);
 200    const float max = 1.0F - min;
 201    const int flr = util_ifloor(s);
 202    float u = frac(s);
 203    if (flr & 1)
 204       u = 1.0F - u;
 205    if (u < min)
 206       *icoord = 0;
 207    else if (u > max)
 208       *icoord = size - 1;
 209    else
 210       *icoord = util_ifloor(u * size);
 211    if (offset)
 212       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 213 }
 214
 215
 216 static void
 217 wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord)
 218 {
 219    /* s limited to [0,1] */
 220    /* i limited to [0,size-1] */
 221    const float u = fabsf(s);
 222    if (u <= 0.0F)
 223       *icoord = 0;
 224    else if (u >= 1.0F)
 225       *icoord = size - 1;
 226    else
 227       *icoord = util_ifloor(u * size);
 228    if (offset)
 229       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 230 }
 231
 232
 233 static void
 234 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 235 {
 236    /* s limited to [min,max] */
 237    /* i limited to [0, size-1] */
 238    const float min = 1.0F / (2.0F * size);
 239    const float max = 1.0F - min;
 240    const float u = fabsf(s);
 241    if (u < min)
 242       *icoord = 0;
 243    else if (u > max)
 244       *icoord = size - 1;
 245    else
 246       *icoord = util_ifloor(u * size);
 247    if (offset)
 248       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 249 }
 250
 251
 252 static void
 253 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 254 {
 255    /* s limited to [min,max] */
 256    /* i limited to [0, size-1] */
 257    const float min = -1.0F / (2.0F * size);
 258    const float max = 1.0F - min;
 259    const float u = fabsf(s);
 260    if (u < min)
 261       *icoord = -1;
 262    else if (u > max)
 263       *icoord = size;
 264    else
 265       *icoord = util_ifloor(u * size);
 266    if (offset)
 267       *icoord = CLAMP(*icoord + offset, 0, size - 1);
 268 }
 269
 270
 271 /**
 272  * Used to compute texel locations for linear sampling
 273  * \param wrapMode  PIPE_TEX_WRAP_x
 274  * \param s  the texcoord
 275  * \param size  the texture image size
 276  * \param icoord0  returns first texture index
 277  * \param icoord1  returns second texture index (usually icoord0 + 1)
 278  * \param w  returns blend factor/weight between texture indices
 279  * \param icoord  returns the computed integer texture coord
 280  */
 281 static void
 282 wrap_linear_repeat(float s, unsigned size, int offset,
 283                    int *icoord0, int *icoord1, float *w)
 284 {
 285    float u = s * size - 0.5F;
 286    *icoord0 = repeat(util_ifloor(u) + offset, size);
 287    *icoord1 = repeat(*icoord0 + 1, size);
 288    *w = frac(u);
 289 }
 290
 291
 292 static void
 293 wrap_linear_clamp(float s, unsigned size, int offset,
 294                   int *icoord0, int *icoord1, float *w)
 295 {
 296    float u = CLAMP(s, 0.0F, 1.0F);
 297    u = u * size - 0.5f;
 298    *icoord0 = util_ifloor(u);
 299    *icoord1 = *icoord0 + 1;
 300    if (offset) {
 301       *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1);
 302       *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1);
 303    }
 304    *w = frac(u);
 305 }
 306
 307
 308 static void
 309 wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
 310                           int *icoord0, int *icoord1, float *w)
 311 {
 312    float u = CLAMP(s, 0.0F, 1.0F);
 313    u = u * size - 0.5f;
 314    *icoord0 = util_ifloor(u);
 315    *icoord1 = *icoord0 + 1;
 316    if (*icoord0 < 0)
 317       *icoord0 = 0;
 318    if (*icoord1 >= (int) size)
 319       *icoord1 = size - 1;
 320    if (offset) {
 321       *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1);
 322       *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1);
 323    }
 324    *w = frac(u);
 325 }
 326
 327
 328 static void
 329 wrap_linear_clamp_to_border(float s, unsigned size, int offset,
 330                             int *icoord0, int *icoord1, float *w)
 331 {
 332    const float min = -1.0F / (2.0F * size);
 333    const float max = 1.0F - min;
 334    float u = CLAMP(s, min, max);
 335    u = u * size - 0.5f;
 336    *icoord0 = util_ifloor(u);
 337    *icoord1 = *icoord0 + 1;
 338    *w = frac(u);
 339 }
 340
 341
 342 static void
 343 wrap_linear_mirror_repeat(float s, unsigned size, int offset,
 344                           int *icoord0, int *icoord1, float *w)
 345 {
 346    const int flr = util_ifloor(s);
 347    float u = frac(s);
 348    if (flr & 1)
 349       u = 1.0F - u;
 350    u = u * size - 0.5F;
 351    *icoord0 = util_ifloor(u);
 352    *icoord1 = *icoord0 + 1;
 353    if (*icoord0 < 0)
 354       *icoord0 = 0;
 355    if (*icoord1 >= (int) size)
 356       *icoord1 = size - 1;
 357    *w = frac(u);
 358 }
 359
 360
 361 static void
 362 wrap_linear_mirror_clamp(float s, unsigned size, int offset,
 363                          int *icoord0, int *icoord1, float *w)
 364 {
 365    float u = fabsf(s);
 366    if (u >= 1.0F)
 367       u = (float) size;
 368    else
 369       u *= size;
 370    u -= 0.5F;
 371    *icoord0 = util_ifloor(u);
 372    *icoord1 = *icoord0 + 1;
 373    *w = frac(u);
 374 }
 375
 376
 377 static void
 378 wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset,
 379                                  int *icoord0, int *icoord1, float *w)
 380 {
 381    float u = fabsf(s);
 382    if (u >= 1.0F)
 383       u = (float) size;
 384    else
 385       u *= size;
 386    u -= 0.5F;
 387    *icoord0 = util_ifloor(u);
 388    *icoord1 = *icoord0 + 1;
 389    if (*icoord0 < 0)
 390       *icoord0 = 0;
 391    if (*icoord1 >= (int) size)
 392       *icoord1 = size - 1;
 393    *w = frac(u);
 394 }
 395
 396
 397 static void
 398 wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
 399                                    int *icoord0, int *icoord1, float *w)
 400 {
 401    const float min = -1.0F / (2.0F * size);
 402    const float max = 1.0F - min;
 403    float u = fabsf(s);
 404    if (u <= min)
 405       u = min * size;
 406    else if (u >= max)
 407       u = max * size;
 408    else
 409       u *= size;
 410    u -= 0.5F;
 411    *icoord0 = util_ifloor(u);
 412    *icoord1 = *icoord0 + 1;
 413    *w = frac(u);
 414 }
 415
 416
 417 /**
 418  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
 419  */
 420 static void
 421 wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
 422 {
 423    int i = util_ifloor(s);
 424    *icoord = CLAMP(i + offset, 0, (int) size-1);
 425 }
 426
 427
 428 /**
 429  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
 430  */
 431 static void
 432 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 433 {
 434    *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) );
 435 }
 436
 437
 438 /**
 439  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
 440  */
 441 static void
 442 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 443 {
 444    *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) );
 445 }
 446
 447
 448 /**
 449  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
 450  */
 451 static void
 452 wrap_linear_unorm_clamp(float s, unsigned size, int offset,
 453                         int *icoord0, int *icoord1, float *w)
 454 {
 455    /* Not exactly what the spec says, but it matches NVIDIA output */
 456    float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
 457    *icoord0 = util_ifloor(u);
 458    *icoord1 = *icoord0 + 1;
 459    *w = frac(u);
 460 }
 461
 462
 463 /**
 464  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
 465  */
 466 static void
 467 wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
 468                                   int *icoord0, int *icoord1, float *w)
 469 {
 470    float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
 471    u -= 0.5F;
 472    *icoord0 = util_ifloor(u);
 473    *icoord1 = *icoord0 + 1;
 474    if (*icoord1 > (int) size - 1)
 475       *icoord1 = size - 1;
 476    *w = frac(u);
 477 }
 478
 479
 480 /**
 481  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
 482  */
 483 static void
 484 wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
 485                                 int *icoord0, int *icoord1, float *w)
 486 {
 487    float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
 488    u -= 0.5F;
 489    *icoord0 = util_ifloor(u);
 490    *icoord1 = *icoord0 + 1;
 491    if (*icoord1 > (int) size - 1)
 492       *icoord1 = size - 1;
 493    *w = frac(u);
 494 }
 495
 496
 497 /**
 498  * Do coordinate to array index conversion.  For array textures.
 499  */
 500 static INLINE int
 501 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
 502 {
 503    int c = util_ifloor(coord + 0.5F);
 504    return CLAMP(c, (int)first_layer, (int)last_layer);
 505 }
 506
 507
 508 /**
 509  * Examine the quad's texture coordinates to compute the partial
 510  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 511  */
 512 static float
 513 compute_lambda_1d(const struct sp_sampler_view *sview,
 514                   const float s[TGSI_QUAD_SIZE],
 515                   const float t[TGSI_QUAD_SIZE],
 516                   const float p[TGSI_QUAD_SIZE])
 517 {
 518    const struct pipe_resource *texture = sview->base.texture;
 519    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 520    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 521    float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 522
 523    return util_fast_log2(rho);
 524 }
 525
 526
 527 static float
 528 compute_lambda_2d(const struct sp_sampler_view *sview,
 529                   const float s[TGSI_QUAD_SIZE],
 530                   const float t[TGSI_QUAD_SIZE],
 531                   const float p[TGSI_QUAD_SIZE])
 532 {
 533    const struct pipe_resource *texture = sview->base.texture;
 534    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 535    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 536    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 537    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 538    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 539    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 540    float rho  = MAX2(maxx, maxy);
 541
 542    return util_fast_log2(rho);
 543 }
 544
 545
 546 static float
 547 compute_lambda_3d(const struct sp_sampler_view *sview,
 548                   const float s[TGSI_QUAD_SIZE],
 549                   const float t[TGSI_QUAD_SIZE],
 550                   const float p[TGSI_QUAD_SIZE])
 551 {
 552    const struct pipe_resource *texture = sview->base.texture;
 553    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 554    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 555    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 556    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 557    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 558    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 559    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 560    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 561    float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
 562    float rho;
 563
 564    rho = MAX2(maxx, maxy);
 565    rho = MAX2(rho, maxz);
 566
 567    return util_fast_log2(rho);
 568 }
 569
 570
 571 /**
 572  * Compute lambda for a vertex texture sampler.
 573  * Since there aren't derivatives to use, just return 0.
 574  */
 575 static float
 576 compute_lambda_vert(const struct sp_sampler_view *sview,
 577                     const float s[TGSI_QUAD_SIZE],
 578                     const float t[TGSI_QUAD_SIZE],
 579                     const float p[TGSI_QUAD_SIZE])
 580 {
 581    return 0.0f;
 582 }
 583
 584
 585
 586 /**
 587  * Get a texel from a texture, using the texture tile cache.
 588  *
 589  * \param addr  the template tex address containing cube, z, face info.
 590  * \param x  the x coord of texel within 2D image
 591  * \param y  the y coord of texel within 2D image
 592  * \param rgba  the quad to put the texel/color into
 593  *
 594  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 595  * sp_get_cached_tile_tex() function.
 596  */
 597
 598
 599
 600
 601 static INLINE const float *
 602 get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
 603                        union tex_tile_address addr, int x, int y)
 604 {
 605    const struct softpipe_tex_cached_tile *tile;
 606    addr.bits.x = x / TEX_TILE_SIZE;
 607    addr.bits.y = y / TEX_TILE_SIZE;
 608    y %= TEX_TILE_SIZE;
 609    x %= TEX_TILE_SIZE;
 610
 611    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 612
 613    return &tile->data.color[y][x][0];
 614 }
 615
 616
 617 static INLINE const float *
 618 get_texel_2d(const struct sp_sampler_view *sp_sview,
 619              const struct sp_sampler *sp_samp,
 620              union tex_tile_address addr, int x, int y)
 621 {
 622    const struct pipe_resource *texture = sp_sview->base.texture;
 623    unsigned level = addr.bits.level;
 624
 625    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 626        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 627       return sp_samp->base.border_color.f;
 628    }
 629    else {
 630       return get_texel_2d_no_border( sp_sview, addr, x, y );
 631    }
 632 }
 633
 634
 635 /*
 636  * Here's the complete logic (HOLY CRAP) for finding next face and doing the
 637  * corresponding coord wrapping, implemented by get_next_face,
 638  * get_next_xcoord, get_next_ycoord.
 639  * Read like that (first line):
 640  * If face is +x and s coord is below zero, then
 641  * new face is +z, new s is max , new t is old t
 642  * (max is always cube size - 1).
 643  *
 644  * +x s- -> +z: s = max,   t = t
 645  * +x s+ -> -z: s = 0,     t = t
 646  * +x t- -> +y: s = max,   t = max-s
 647  * +x t+ -> -y: s = max,   t = s
 648  *
 649  * -x s- -> -z: s = max,   t = t
 650  * -x s+ -> +z: s = 0,     t = t
 651  * -x t- -> +y: s = 0,     t = s
 652  * -x t+ -> -y: s = 0,     t = max-s
 653  *
 654  * +y s- -> -x: s = t,     t = 0
 655  * +y s+ -> +x: s = max-t, t = 0
 656  * +y t- -> -z: s = max-s, t = 0
 657  * +y t+ -> +z: s = s,     t = 0
 658  *
 659  * -y s- -> -x: s = max-t, t = max
 660  * -y s+ -> +x: s = t,     t = max
 661  * -y t- -> +z: s = s,     t = max
 662  * -y t+ -> -z: s = max-s, t = max
 663
 664  * +z s- -> -x: s = max,   t = t
 665  * +z s+ -> +x: s = 0,     t = t
 666  * +z t- -> +y: s = s,     t = max
 667  * +z t+ -> -y: s = s,     t = 0
 668
 669  * -z s- -> +x: s = max,   t = t
 670  * -z s+ -> -x: s = 0,     t = t
 671  * -z t- -> +y: s = max-s, t = 0
 672  * -z t+ -> -y: s = max-s, t = max
 673  */
 674
 675
 676 /*
 677  * seamless cubemap neighbour array.
 678  * this array is used to find the adjacent face in each of 4 directions,
 679  * left, right, up, down. (or -x, +x, -y, +y).
 680  */
 681 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
 682    /* pos X first then neg X is Z different, Y the same */
 683    /* PIPE_TEX_FACE_POS_X,*/
 684    { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
 685      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 686    /* PIPE_TEX_FACE_NEG_X */
 687    { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
 688      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 689
 690    /* pos Y first then neg Y is X different, X the same */
 691    /* PIPE_TEX_FACE_POS_Y */
 692    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 693      PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
 694
 695    /* PIPE_TEX_FACE_NEG_Y */
 696    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 697      PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
 698
 699    /* pos Z first then neg Y is X different, X the same */
 700    /* PIPE_TEX_FACE_POS_Z */
 701    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 702      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 703
 704    /* PIPE_TEX_FACE_NEG_Z */
 705    { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
 706      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
 707 };
 708
 709 static INLINE unsigned
 710 get_next_face(unsigned face, int idx)
 711 {
 712    return face_array[face][idx];
 713 }
 714
 715 /*
 716  * return a new xcoord based on old face, old coords, cube size
 717  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 718  */
 719 static INLINE int
 720 get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 721 {
 722    if ((face == 0 && fall_off_index != 1) ||
 723        (face == 1 && fall_off_index == 0) ||
 724        (face == 4 && fall_off_index == 0) ||
 725        (face == 5 && fall_off_index == 0)) {
 726       return max;
 727    }
 728    if ((face == 1 && fall_off_index != 0) ||
 729        (face == 0 && fall_off_index == 1) ||
 730        (face == 4 && fall_off_index == 1) ||
 731        (face == 5 && fall_off_index == 1)) {
 732       return 0;
 733    }
 734    if ((face == 4 && fall_off_index >= 2) ||
 735        (face == 2 && fall_off_index == 3) ||
 736        (face == 3 && fall_off_index == 2)) {
 737       return xc;
 738    }
 739    if ((face == 5 && fall_off_index >= 2) ||
 740        (face == 2 && fall_off_index == 2) ||
 741        (face == 3 && fall_off_index == 3)) {
 742       return max - xc;
 743    }
 744    if ((face == 2 && fall_off_index == 0) ||
 745        (face == 3 && fall_off_index == 1)) {
 746       return yc;
 747    }
 748    /* (face == 2 && fall_off_index == 1) ||
 749       (face == 3 && fall_off_index == 0)) */
 750    return max - yc;
 751 }
 752
 753 /*
 754  * return a new ycoord based on old face, old coords, cube size
 755  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 756  */
 757 static INLINE int
 758 get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 759 {
 760    if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
 761       return yc;
 762    }
 763    if (face == 2 ||
 764        (face == 4 && fall_off_index == 3) ||
 765        (face == 5 && fall_off_index == 2)) {
 766       return 0;
 767    }
 768    if (face == 3 ||
 769        (face == 4 && fall_off_index == 2) ||
 770        (face == 5 && fall_off_index == 3)) {
 771       return max;
 772    }
 773    if ((face == 0 && fall_off_index == 3) ||
 774        (face == 1 && fall_off_index == 2)) {
 775       return xc;
 776    }
 777    /* (face == 0 && fall_off_index == 2) ||
 778       (face == 1 && fall_off_index == 3) */
 779    return max - xc;
 780 }
 781
 782
 783 /* Gather a quad of adjacent texels within a tile:
 784  */
 785 static INLINE void
 786 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
 787                                         union tex_tile_address addr,
 788                                         unsigned x, unsigned y,
 789                                         const float *out[4])
 790 {
 791     const struct softpipe_tex_cached_tile *tile;
 792
 793    addr.bits.x = x / TEX_TILE_SIZE;
 794    addr.bits.y = y / TEX_TILE_SIZE;
 795    y %= TEX_TILE_SIZE;
 796    x %= TEX_TILE_SIZE;
 797
 798    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 799
 800    out[0] = &tile->data.color[y  ][x  ][0];
 801    out[1] = &tile->data.color[y  ][x+1][0];
 802    out[2] = &tile->data.color[y+1][x  ][0];
 803    out[3] = &tile->data.color[y+1][x+1][0];
 804 }
 805
 806
 807 /* Gather a quad of potentially non-adjacent texels:
 808  */
 809 static INLINE void
 810 get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
 811                             union tex_tile_address addr,
 812                             int x0, int y0,
 813                             int x1, int y1,
 814                             const float *out[4])
 815 {
 816    out[0] = get_texel_2d_no_border( sp_sview, addr, x0, y0 );
 817    out[1] = get_texel_2d_no_border( sp_sview, addr, x1, y0 );
 818    out[2] = get_texel_2d_no_border( sp_sview, addr, x0, y1 );
 819    out[3] = get_texel_2d_no_border( sp_sview, addr, x1, y1 );
 820 }
 821
 822 /* Can involve a lot of unnecessary checks for border color:
 823  */
 824 static INLINE void
 825 get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
 826                   const struct sp_sampler *sp_samp,
 827                   union tex_tile_address addr,
 828                   int x0, int y0,
 829                   int x1, int y1,
 830                   const float *out[4])
 831 {
 832    out[0] = get_texel_2d( sp_sview, sp_samp, addr, x0, y0 );
 833    out[1] = get_texel_2d( sp_sview, sp_samp, addr, x1, y0 );
 834    out[3] = get_texel_2d( sp_sview, sp_samp, addr, x1, y1 );
 835    out[2] = get_texel_2d( sp_sview, sp_samp, addr, x0, y1 );
 836 }
 837
 838
 839
 840 /* 3d variants:
 841  */
 842 static INLINE const float *
 843 get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
 844                        union tex_tile_address addr, int x, int y, int z)
 845 {
 846    const struct softpipe_tex_cached_tile *tile;
 847
 848    addr.bits.x = x / TEX_TILE_SIZE;
 849    addr.bits.y = y / TEX_TILE_SIZE;
 850    addr.bits.z = z;
 851    y %= TEX_TILE_SIZE;
 852    x %= TEX_TILE_SIZE;
 853
 854    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 855
 856    return &tile->data.color[y][x][0];
 857 }
 858
 859
 860 static INLINE const float *
 861 get_texel_3d(const struct sp_sampler_view *sp_sview,
 862              const struct sp_sampler *sp_samp,
 863              union tex_tile_address addr, int x, int y, int z)
 864 {
 865    const struct pipe_resource *texture = sp_sview->base.texture;
 866    unsigned level = addr.bits.level;
 867
 868    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 869        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 870        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 871       return sp_samp->base.border_color.f;
 872    }
 873    else {
 874       return get_texel_3d_no_border( sp_sview, addr, x, y, z );
 875    }
 876 }
 877
 878
 879 /* Get texel pointer for 1D array texture */
 880 static INLINE const float *
 881 get_texel_1d_array(const struct sp_sampler_view *sp_sview,
 882                    const struct sp_sampler *sp_samp,
 883                    union tex_tile_address addr, int x, int y)
 884 {
 885    const struct pipe_resource *texture = sp_sview->base.texture;
 886    unsigned level = addr.bits.level;
 887
 888    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
 889       return sp_samp->base.border_color.f;
 890    }
 891    else {
 892       return get_texel_2d_no_border(sp_sview, addr, x, y);
 893    }
 894 }
 895
 896
 897 /* Get texel pointer for 2D array texture */
 898 static INLINE const float *
 899 get_texel_2d_array(const struct sp_sampler_view *sp_sview,
 900                    const struct sp_sampler *sp_samp,
 901                    union tex_tile_address addr, int x, int y, int layer)
 902 {
 903    const struct pipe_resource *texture = sp_sview->base.texture;
 904    unsigned level = addr.bits.level;
 905
 906    assert(layer < (int) texture->array_size);
 907    assert(layer >= 0);
 908
 909    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 910        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 911       return sp_samp->base.border_color.f;
 912    }
 913    else {
 914       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 915    }
 916 }
 917
 918
 919 static INLINE const float *
 920 get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
 921                         union tex_tile_address addr, int x, int y,
 922                         float *corner, int layer, unsigned face)
 923 {
 924    const struct pipe_resource *texture = sp_sview->base.texture;
 925    unsigned level = addr.bits.level;
 926    int new_x, new_y, max_x;
 927
 928    max_x = (int) u_minify(texture->width0, level);
 929
 930    assert(texture->width0 == texture->height0);
 931    new_x = x;
 932    new_y = y;
 933
 934    /* change the face */
 935    if (x < 0) {
 936       /*
 937        * Cheat with corners. They are difficult and I believe because we don't get
 938        * per-pixel faces we can actually have multiple corner texels per pixel,
 939        * which screws things up majorly in any case (as the per spec behavior is
 940        * to average the 3 remaining texels, which we might not have).
 941        * Hence just make sure that the 2nd coord is clamped, will simply pick the
 942        * sample which would have fallen off the x coord, but not y coord.
 943        * So the filter weight of the samples will be wrong, but at least this
 944        * ensures that only valid texels near the corner are used.
 945        */
 946       if (y < 0 || y >= max_x) {
 947          y = CLAMP(y, 0, max_x - 1);
 948       }
 949       new_x = get_next_xcoord(face, 0, max_x -1, x, y);
 950       new_y = get_next_ycoord(face, 0, max_x -1, x, y);
 951       face = get_next_face(face, 0);
 952    } else if (x >= max_x) {
 953       if (y < 0 || y >= max_x) {
 954          y = CLAMP(y, 0, max_x - 1);
 955       }
 956       new_x = get_next_xcoord(face, 1, max_x -1, x, y);
 957       new_y = get_next_ycoord(face, 1, max_x -1, x, y);
 958       face = get_next_face(face, 1);
 959    } else if (y < 0) {
 960       new_x = get_next_xcoord(face, 2, max_x -1, x, y);
 961       new_y = get_next_ycoord(face, 2, max_x -1, x, y);
 962       face = get_next_face(face, 2);
 963    } else if (y >= max_x) {
 964       new_x = get_next_xcoord(face, 3, max_x -1, x, y);
 965       new_y = get_next_ycoord(face, 3, max_x -1, x, y);
 966       face = get_next_face(face, 3);
 967    }
 968
 969    return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face);
 970 }
 971
 972
 973 /* Get texel pointer for cube array texture */
 974 static INLINE const float *
 975 get_texel_cube_array(const struct sp_sampler_view *sp_sview,
 976                      const struct sp_sampler *sp_samp,
 977                      union tex_tile_address addr, int x, int y, int layer)
 978 {
 979    const struct pipe_resource *texture = sp_sview->base.texture;
 980    unsigned level = addr.bits.level;
 981
 982    assert(layer < (int) texture->array_size);
 983    assert(layer >= 0);
 984
 985    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 986        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 987       return sp_samp->base.border_color.f;
 988    }
 989    else {
 990       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 991    }
 992 }
 993 /**
 994  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 995  * return the size (in texels) of that mipmap level.
 996  * For example, if level[0].width = 256 then base_pot will be 8.
 997  * If level = 2, then we'll return 64 (the width at level=2).
 998  * Return 1 if level > base_pot.
 999  */
1000 static INLINE unsigned
1001 pot_level_size(unsigned base_pot, unsigned level)
1002 {
1003    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
1004 }
1005
1006
1007 static void
1008 print_sample(const char *function, const float *rgba)
1009 {
1010    debug_printf("%s %g %g %g %g\n",
1011                 function,
1012                 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
1013 }
1014
1015
1016 static void
1017 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1018 {
1019    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1020                 function,
1021                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1022                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1023                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1024                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1025 }
1026
1027
1028 /* Some image-filter fastpaths:
1029  */
1030 static INLINE void
1031 img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
1032                                 struct sp_sampler *sp_samp,
1033                                 const struct img_filter_args *args,
1034                                 float *rgba)
1035 {
1036    unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1037    unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1038    int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
1039    int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
1040    union tex_tile_address addr;
1041    int c;
1042
1043    float u = args->s * xpot - 0.5F;
1044    float v = args->t * ypot - 0.5F;
1045
1046    int uflr = util_ifloor(u);
1047    int vflr = util_ifloor(v);
1048
1049    float xw = u - (float)uflr;
1050    float yw = v - (float)vflr;
1051
1052    int x0 = uflr & (xpot - 1);
1053    int y0 = vflr & (ypot - 1);
1054
1055    const float *tx[4];
1056
1057    addr.value = 0;
1058    addr.bits.level = args->level;
1059
1060    /* Can we fetch all four at once:
1061     */
1062    if (x0 < xmax && y0 < ymax) {
1063       get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
1064    }
1065    else {
1066       unsigned x1 = (x0 + 1) & (xpot - 1);
1067       unsigned y1 = (y0 + 1) & (ypot - 1);
1068       get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
1069    }
1070
1071    /* interpolate R, G, B, A */
1072    for (c = 0; c < TGSI_QUAD_SIZE; c++) {
1073       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1074                                        tx[0][c], tx[1][c],
1075                                        tx[2][c], tx[3][c]);
1076    }
1077
1078    if (DEBUG_TEX) {
1079       print_sample(__FUNCTION__, rgba);
1080    }
1081 }
1082
1083
1084 static INLINE void
1085 img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
1086                                  struct sp_sampler *sp_samp,
1087                                  const struct img_filter_args *args,
1088                                  float rgba[TGSI_QUAD_SIZE])
1089 {
1090    unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1091    unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1092    const float *out;
1093    union tex_tile_address addr;
1094    int c;
1095
1096    float u = args->s * xpot;
1097    float v = args->t * ypot;
1098
1099    int uflr = util_ifloor(u);
1100    int vflr = util_ifloor(v);
1101
1102    int x0 = uflr & (xpot - 1);
1103    int y0 = vflr & (ypot - 1);
1104
1105    addr.value = 0;
1106    addr.bits.level = args->level;
1107
1108    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1109    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1110       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1111
1112    if (DEBUG_TEX) {
1113       print_sample(__FUNCTION__, rgba);
1114    }
1115 }
1116
1117
1118 static INLINE void
1119 img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
1120                                 struct sp_sampler *sp_samp,
1121                                 const struct img_filter_args *args,
1122                                 float rgba[TGSI_QUAD_SIZE])
1123 {
1124    unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1125    unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1126    union tex_tile_address addr;
1127    int c;
1128
1129    float u = args->s * xpot;
1130    float v = args->t * ypot;
1131
1132    int x0, y0;
1133    const float *out;
1134
1135    addr.value = 0;
1136    addr.bits.level = args->level;
1137
1138    x0 = util_ifloor(u);
1139    if (x0 < 0)
1140       x0 = 0;
1141    else if (x0 > (int) xpot - 1)
1142       x0 = xpot - 1;
1143
1144    y0 = util_ifloor(v);
1145    if (y0 < 0)
1146       y0 = 0;
1147    else if (y0 > (int) ypot - 1)
1148       y0 = ypot - 1;
1149
1150    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1151    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1152       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1153
1154    if (DEBUG_TEX) {
1155       print_sample(__FUNCTION__, rgba);
1156    }
1157 }
1158
1159
1160 static void
1161 img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
1162                       struct sp_sampler *sp_samp,
1163                       const struct img_filter_args *args,
1164                       float rgba[TGSI_QUAD_SIZE])
1165 {
1166    const struct pipe_resource *texture = sp_sview->base.texture;
1167    int width;
1168    int x;
1169    union tex_tile_address addr;
1170    const float *out;
1171    int c;
1172
1173    width = u_minify(texture->width0, args->level);
1174
1175    assert(width > 0);
1176
1177    addr.value = 0;
1178    addr.bits.level = args->level;
1179
1180    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1181
1182    out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
1183    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1184       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1185
1186    if (DEBUG_TEX) {
1187       print_sample(__FUNCTION__, rgba);
1188    }
1189 }
1190
1191
1192 static void
1193 img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
1194                             struct sp_sampler *sp_samp,
1195                             const struct img_filter_args *args,
1196                             float *rgba)
1197 {
1198    const struct pipe_resource *texture = sp_sview->base.texture;
1199    int width;
1200    int x, layer;
1201    union tex_tile_address addr;
1202    const float *out;
1203    int c;
1204
1205    width = u_minify(texture->width0, args->level);
1206
1207    assert(width > 0);
1208
1209    addr.value = 0;
1210    addr.bits.level = args->level;
1211
1212    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1213    layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1214                           sp_sview->base.u.tex.last_layer);
1215
1216    out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
1217    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1218       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1219
1220    if (DEBUG_TEX) {
1221       print_sample(__FUNCTION__, rgba);
1222    }
1223 }
1224
1225
1226 static void
1227 img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
1228                       struct sp_sampler *sp_samp,
1229                       const struct img_filter_args *args,
1230                       float *rgba)
1231 {
1232    const struct pipe_resource *texture = sp_sview->base.texture;
1233    int width, height;
1234    int x, y;
1235    union tex_tile_address addr;
1236    const float *out;
1237    int c;
1238
1239    width = u_minify(texture->width0, args->level);
1240    height = u_minify(texture->height0, args->level);
1241
1242    assert(width > 0);
1243    assert(height > 0);
1244
1245    addr.value = 0;
1246    addr.bits.level = args->level;
1247
1248    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1249    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1250
1251    out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
1252    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1253       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1254
1255    if (DEBUG_TEX) {
1256       print_sample(__FUNCTION__, rgba);
1257    }
1258 }
1259
1260
1261 static void
1262 img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
1263                             struct sp_sampler *sp_samp,
1264                             const struct img_filter_args *args,
1265                             float *rgba)
1266 {
1267    const struct pipe_resource *texture = sp_sview->base.texture;
1268    int width, height;
1269    int x, y, layer;
1270    union tex_tile_address addr;
1271    const float *out;
1272    int c;
1273
1274    width = u_minify(texture->width0, args->level);
1275    height = u_minify(texture->height0, args->level);
1276
1277    assert(width > 0);
1278    assert(height > 0);
1279
1280    addr.value = 0;
1281    addr.bits.level = args->level;
1282
1283    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1284    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1285    layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1286                           sp_sview->base.u.tex.last_layer);
1287
1288    out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
1289    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1290       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1291
1292    if (DEBUG_TEX) {
1293       print_sample(__FUNCTION__, rgba);
1294    }
1295 }
1296
1297
1298 static void
1299 img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
1300                         struct sp_sampler *sp_samp,
1301                         const struct img_filter_args *args,
1302                         float *rgba)
1303 {
1304    const struct pipe_resource *texture = sp_sview->base.texture;
1305    int width, height;
1306    int x, y, layerface;
1307    union tex_tile_address addr;
1308    const float *out;
1309    int c;
1310
1311    width = u_minify(texture->width0, args->level);
1312    height = u_minify(texture->height0, args->level);
1313
1314    assert(width > 0);
1315    assert(height > 0);
1316
1317    addr.value = 0;
1318    addr.bits.level = args->level;
1319
1320    /*
1321     * If NEAREST filtering is done within a miplevel, always apply wrap
1322     * mode CLAMP_TO_EDGE.
1323     */
1324    if (sp_samp->base.seamless_cube_map) {
1325       wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x);
1326       wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y);
1327    } else {
1328       /* Would probably make sense to ignore mode and just do edge clamp */
1329       sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1330       sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1331    }
1332
1333    layerface = args->face_id + sp_sview->base.u.tex.first_layer;
1334    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1335    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1336       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1337
1338    if (DEBUG_TEX) {
1339       print_sample(__FUNCTION__, rgba);
1340    }
1341 }
1342
1343 static void
1344 img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
1345                               struct sp_sampler *sp_samp,
1346                               const struct img_filter_args *args,
1347                               float *rgba)
1348 {
1349    const struct pipe_resource *texture = sp_sview->base.texture;
1350    int width, height;
1351    int x, y, layerface;
1352    union tex_tile_address addr;
1353    const float *out;
1354    int c;
1355
1356    width = u_minify(texture->width0, args->level);
1357    height = u_minify(texture->height0, args->level);
1358
1359    assert(width > 0);
1360    assert(height > 0);
1361
1362    addr.value = 0;
1363    addr.bits.level = args->level;
1364
1365    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1366    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1367    layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1368                               sp_sview->base.u.tex.first_layer,
1369                               sp_sview->base.u.tex.last_layer - 5) + args->face_id;
1370
1371    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1372    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1373       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1374
1375    if (DEBUG_TEX) {
1376       print_sample(__FUNCTION__, rgba);
1377    }
1378 }
1379
1380 static void
1381 img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
1382                       struct sp_sampler *sp_samp,
1383                       const struct img_filter_args *args,
1384                       float *rgba)
1385 {
1386    const struct pipe_resource *texture = sp_sview->base.texture;
1387    int width, height, depth;
1388    int x, y, z;
1389    union tex_tile_address addr;
1390    const float *out;
1391    int c;
1392
1393    width = u_minify(texture->width0, args->level);
1394    height = u_minify(texture->height0, args->level);
1395    depth = u_minify(texture->depth0, args->level);
1396
1397    assert(width > 0);
1398    assert(height > 0);
1399    assert(depth > 0);
1400
1401    sp_samp->nearest_texcoord_s(args->s, width,  args->offset[0], &x);
1402    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1403    sp_samp->nearest_texcoord_p(args->p, depth,  args->offset[2], &z);
1404
1405    addr.value = 0;
1406    addr.bits.level = args->level;
1407
1408    out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
1409    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1410       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1411 }
1412
1413
1414 static void
1415 img_filter_1d_linear(struct sp_sampler_view *sp_sview,
1416                      struct sp_sampler *sp_samp,
1417                      const struct img_filter_args *args,
1418                      float *rgba)
1419 {
1420    const struct pipe_resource *texture = sp_sview->base.texture;
1421    int width;
1422    int x0, x1;
1423    float xw; /* weights */
1424    union tex_tile_address addr;
1425    const float *tx0, *tx1;
1426    int c;
1427
1428    width = u_minify(texture->width0, args->level);
1429
1430    assert(width > 0);
1431
1432    addr.value = 0;
1433    addr.bits.level = args->level;
1434
1435    sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1436
1437    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
1438    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
1439
1440    /* interpolate R, G, B, A */
1441    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1442       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1443 }
1444
1445
1446 static void
1447 img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
1448                            struct sp_sampler *sp_samp,
1449                            const struct img_filter_args *args,
1450                            float *rgba)
1451 {
1452    const struct pipe_resource *texture = sp_sview->base.texture;
1453    int width;
1454    int x0, x1, layer;
1455    float xw; /* weights */
1456    union tex_tile_address addr;
1457    const float *tx0, *tx1;
1458    int c;
1459
1460    width = u_minify(texture->width0, args->level);
1461
1462    assert(width > 0);
1463
1464    addr.value = 0;
1465    addr.bits.level = args->level;
1466
1467    sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1468    layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1469                           sp_sview->base.u.tex.last_layer);
1470
1471    tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
1472    tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
1473
1474    /* interpolate R, G, B, A */
1475    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1476       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1477 }
1478
1479 /*
1480  * Retrieve the gathered value, need to convert to the
1481  * TGSI expected interface, and take component select
1482  * and swizzling into account.
1483  */
1484 static float
1485 get_gather_value(const struct sp_sampler_view *sp_sview,
1486                  int chan_in, int comp_sel,
1487                  const float *tx[4])
1488 {
1489    int chan;
1490    unsigned swizzle;
1491
1492    /*
1493     * softpipe samples in a different order
1494     * to TGSI expects, so we need to swizzle,
1495     * the samples into the correct slots.
1496     */
1497    switch (chan_in) {
1498    case 0:
1499       chan = 2;
1500       break;
1501    case 1:
1502       chan = 3;
1503       break;
1504    case 2:
1505       chan = 1;
1506       break;
1507    case 3:
1508       chan = 0;
1509       break;
1510    default:
1511       assert(0);
1512       return 0.0;
1513    }
1514
1515    /* pick which component to use for the swizzle */
1516    switch (comp_sel) {
1517    case 0:
1518       swizzle = sp_sview->base.swizzle_r;
1519       break;
1520    case 1:
1521       swizzle = sp_sview->base.swizzle_g;
1522       break;
1523    case 2:
1524       swizzle = sp_sview->base.swizzle_b;
1525       break;
1526    case 3:
1527       swizzle = sp_sview->base.swizzle_a;
1528       break;
1529    default:
1530       assert(0);
1531       return 0.0;
1532    }
1533
1534    /* get correct result using the channel and swizzle */
1535    switch (swizzle) {
1536    case PIPE_SWIZZLE_ZERO:
1537       return 0.0;
1538    case PIPE_SWIZZLE_ONE:
1539       return 1.0;
1540    default:
1541       return tx[chan][swizzle];
1542    }
1543 }
1544
1545
1546 static void
1547 img_filter_2d_linear(struct sp_sampler_view *sp_sview,
1548                      struct sp_sampler *sp_samp,
1549                      const struct img_filter_args *args,
1550                      float *rgba)
1551 {
1552    const struct pipe_resource *texture = sp_sview->base.texture;
1553    int width, height;
1554    int x0, y0, x1, y1;
1555    float xw, yw; /* weights */
1556    union tex_tile_address addr;
1557    const float *tx[4];
1558    int c;
1559
1560    width = u_minify(texture->width0, args->level);
1561    height = u_minify(texture->height0, args->level);
1562
1563    assert(width > 0);
1564    assert(height > 0);
1565
1566    addr.value = 0;
1567    addr.bits.level = args->level;
1568
1569    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1570    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1571
1572    tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
1573    tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
1574    tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
1575    tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
1576
1577    if (args->gather_only) {
1578       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1579          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1580                                                       args->gather_comp,
1581                                                       tx);
1582    } else {
1583       /* interpolate R, G, B, A */
1584       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1585          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1586                                              tx[0][c], tx[1][c],
1587                                              tx[2][c], tx[3][c]);
1588    }
1589 }
1590
1591
1592 static void
1593 img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
1594                            struct sp_sampler *sp_samp,
1595                            const struct img_filter_args *args,
1596                            float *rgba)
1597 {
1598    const struct pipe_resource *texture = sp_sview->base.texture;
1599    int width, height;
1600    int x0, y0, x1, y1, layer;
1601    float xw, yw; /* weights */
1602    union tex_tile_address addr;
1603    const float *tx[4];
1604    int c;
1605
1606    width = u_minify(texture->width0, args->level);
1607    height = u_minify(texture->height0, args->level);
1608
1609    assert(width > 0);
1610    assert(height > 0);
1611
1612    addr.value = 0;
1613    addr.bits.level = args->level;
1614
1615    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1616    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1617    layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1618                           sp_sview->base.u.tex.last_layer);
1619
1620    tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
1621    tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
1622    tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
1623    tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
1624
1625    if (args->gather_only) {
1626       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1627          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1628                                                       args->gather_comp,
1629                                                       tx);
1630    } else {
1631       /* interpolate R, G, B, A */
1632       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1633          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1634                                              tx[0][c], tx[1][c],
1635                                              tx[2][c], tx[3][c]);
1636    }
1637 }
1638
1639
1640 static void
1641 img_filter_cube_linear(struct sp_sampler_view *sp_sview,
1642                        struct sp_sampler *sp_samp,
1643                        const struct img_filter_args *args,
1644                        float *rgba)
1645 {
1646    const struct pipe_resource *texture = sp_sview->base.texture;
1647    int width, height;
1648    int x0, y0, x1, y1, layer;
1649    float xw, yw; /* weights */
1650    union tex_tile_address addr;
1651    const float *tx[4];
1652    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1653          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1654    int c;
1655
1656    width = u_minify(texture->width0, args->level);
1657    height = u_minify(texture->height0, args->level);
1658
1659    assert(width > 0);
1660    assert(height > 0);
1661
1662    addr.value = 0;
1663    addr.bits.level = args->level;
1664
1665    /*
1666     * For seamless if LINEAR filtering is done within a miplevel,
1667     * always apply wrap mode CLAMP_TO_BORDER.
1668     */
1669    if (sp_samp->base.seamless_cube_map) {
1670       /* Note this is a bit overkill, actual clamping is not required */
1671       wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1672       wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1673    } else {
1674       /* Would probably make sense to ignore mode and just do edge clamp */
1675       sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1676       sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1677    }
1678
1679    layer = sp_sview->base.u.tex.first_layer;
1680
1681    if (sp_samp->base.seamless_cube_map) {
1682       tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1683       tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1684       tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1685       tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1686    } else {
1687       tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1688       tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1689       tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1690       tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1691    }
1692
1693    if (args->gather_only) {
1694       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1695          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1696                                                       args->gather_comp,
1697                                                       tx);
1698    } else {
1699       /* interpolate R, G, B, A */
1700       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1701          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1702                                              tx[0][c], tx[1][c],
1703                                              tx[2][c], tx[3][c]);
1704    }
1705 }
1706
1707
1708 static void
1709 img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
1710                              struct sp_sampler *sp_samp,
1711                              const struct img_filter_args *args,
1712                              float *rgba)
1713 {
1714    const struct pipe_resource *texture = sp_sview->base.texture;
1715    int width, height;
1716    int x0, y0, x1, y1, layer;
1717    float xw, yw; /* weights */
1718    union tex_tile_address addr;
1719    const float *tx[4];
1720    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1721          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1722    int c;
1723
1724    width = u_minify(texture->width0, args->level);
1725    height = u_minify(texture->height0, args->level);
1726
1727    assert(width > 0);
1728    assert(height > 0);
1729
1730    addr.value = 0;
1731    addr.bits.level = args->level;
1732
1733    /*
1734     * For seamless if LINEAR filtering is done within a miplevel,
1735     * always apply wrap mode CLAMP_TO_BORDER.
1736     */
1737    if (sp_samp->base.seamless_cube_map) {
1738       /* Note this is a bit overkill, actual clamping is not required */
1739       wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1740       wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1741    } else {
1742       /* Would probably make sense to ignore mode and just do edge clamp */
1743       sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1744       sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1745    }
1746
1747    layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1748                           sp_sview->base.u.tex.first_layer,
1749                           sp_sview->base.u.tex.last_layer - 5);
1750
1751    if (sp_samp->base.seamless_cube_map) {
1752       tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1753       tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1754       tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1755       tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1756    } else {
1757       tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1758       tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1759       tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1760       tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1761    }
1762
1763    if (args->gather_only) {
1764       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1765          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1766                                                       args->gather_comp,
1767                                                       tx);
1768    } else {
1769       /* interpolate R, G, B, A */
1770       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1771          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1772                                              tx[0][c], tx[1][c],
1773                                              tx[2][c], tx[3][c]);
1774    }
1775 }
1776
1777 static void
1778 img_filter_3d_linear(struct sp_sampler_view *sp_sview,
1779                      struct sp_sampler *sp_samp,
1780                      const struct img_filter_args *args,
1781                      float *rgba)
1782 {
1783    const struct pipe_resource *texture = sp_sview->base.texture;
1784    int width, height, depth;
1785    int x0, x1, y0, y1, z0, z1;
1786    float xw, yw, zw; /* interpolation weights */
1787    union tex_tile_address addr;
1788    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1789    int c;
1790
1791    width = u_minify(texture->width0, args->level);
1792    height = u_minify(texture->height0, args->level);
1793    depth = u_minify(texture->depth0, args->level);
1794
1795    addr.value = 0;
1796    addr.bits.level = args->level;
1797
1798    assert(width > 0);
1799    assert(height > 0);
1800    assert(depth > 0);
1801
1802    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1803    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1804    sp_samp->linear_texcoord_p(args->p, depth,  args->offset[2], &z0, &z1, &zw);
1805
1806    tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
1807    tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
1808    tx02 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z0);
1809    tx03 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z0);
1810
1811    tx10 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z1);
1812    tx11 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z1);
1813    tx12 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z1);
1814    tx13 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z1);
1815
1816       /* interpolate R, G, B, A */
1817    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1818       rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1819                                            tx00[c], tx01[c],
1820                                            tx02[c], tx03[c],
1821                                            tx10[c], tx11[c],
1822                                            tx12[c], tx13[c]);
1823 }
1824
1825
1826 /* Calculate level of detail for every fragment,
1827  * with lambda already computed.
1828  * Note that lambda has already been biased by global LOD bias.
1829  * \param biased_lambda per-quad lambda.
1830  * \param lod_in per-fragment lod_bias or explicit_lod.
1831  * \param lod returns the per-fragment lod.
1832  */
1833 static INLINE void
1834 compute_lod(const struct pipe_sampler_state *sampler,
1835             enum tgsi_sampler_control control,
1836             const float biased_lambda,
1837             const float lod_in[TGSI_QUAD_SIZE],
1838             float lod[TGSI_QUAD_SIZE])
1839 {
1840    float min_lod = sampler->min_lod;
1841    float max_lod = sampler->max_lod;
1842    uint i;
1843
1844    switch (control) {
1845    case tgsi_sampler_lod_none:
1846    case tgsi_sampler_lod_zero:
1847    /* XXX FIXME */
1848    case tgsi_sampler_derivs_explicit:
1849       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1850       break;
1851    case tgsi_sampler_lod_bias:
1852       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1853          lod[i] = biased_lambda + lod_in[i];
1854          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1855       }
1856       break;
1857    case tgsi_sampler_lod_explicit:
1858       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1859          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1860       }
1861       break;
1862    default:
1863       assert(0);
1864       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1865    }
1866 }
1867
1868
1869 /* Calculate level of detail for every fragment.
1870  * \param lod_in per-fragment lod_bias or explicit_lod.
1871  * \param lod results per-fragment lod.
1872  */
1873 static INLINE void
1874 compute_lambda_lod(struct sp_sampler_view *sp_sview,
1875                    struct sp_sampler *sp_samp,
1876                    const float s[TGSI_QUAD_SIZE],
1877                    const float t[TGSI_QUAD_SIZE],
1878                    const float p[TGSI_QUAD_SIZE],
1879                    const float lod_in[TGSI_QUAD_SIZE],
1880                    enum tgsi_sampler_control control,
1881                    float lod[TGSI_QUAD_SIZE])
1882 {
1883    const struct pipe_sampler_state *sampler = &sp_samp->base;
1884    float lod_bias = sampler->lod_bias;
1885    float min_lod = sampler->min_lod;
1886    float max_lod = sampler->max_lod;
1887    float lambda;
1888    uint i;
1889
1890    switch (control) {
1891    case tgsi_sampler_lod_none:
1892    case tgsi_sampler_gather:
1893       /* XXX FIXME */
1894    case tgsi_sampler_derivs_explicit:
1895       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1896       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
1897       break;
1898    case tgsi_sampler_lod_bias:
1899       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1900       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1901          lod[i] = lambda + lod_in[i];
1902          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1903       }
1904       break;
1905    case tgsi_sampler_lod_explicit:
1906       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1907          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1908       }
1909       break;
1910    case tgsi_sampler_lod_zero:
1911       /* this is all static state in the sampler really need clamp here? */
1912       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
1913       break;
1914    default:
1915       assert(0);
1916       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1917    }
1918 }
1919
1920 static INLINE unsigned
1921 get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
1922 {
1923    /* gather component is stored in lod_in slot as unsigned */
1924    return (*(unsigned int *)lod_in) & 0x3;
1925 }
1926
1927 static void
1928 mip_filter_linear(struct sp_sampler_view *sp_sview,
1929                   struct sp_sampler *sp_samp,
1930                   img_filter_func min_filter,
1931                   img_filter_func mag_filter,
1932                   const float s[TGSI_QUAD_SIZE],
1933                   const float t[TGSI_QUAD_SIZE],
1934                   const float p[TGSI_QUAD_SIZE],
1935                   const float c0[TGSI_QUAD_SIZE],
1936                   const float lod_in[TGSI_QUAD_SIZE],
1937                   const struct filter_args *filt_args,
1938                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1939 {
1940    const struct pipe_sampler_view *psview = &sp_sview->base;
1941    int j;
1942    float lod[TGSI_QUAD_SIZE];
1943    struct img_filter_args args;
1944
1945    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
1946
1947    args.offset = filt_args->offset;
1948    args.gather_only = filt_args->control == tgsi_sampler_gather;
1949    args.gather_comp = get_gather_component(lod_in);
1950
1951    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1952       int level0 = psview->u.tex.first_level + (int)lod[j];
1953
1954       args.s = s[j];
1955       args.t = t[j];
1956       args.p = p[j];
1957       args.face_id = sp_sview->faces[j];
1958
1959       if (lod[j] < 0.0) {
1960          args.level = psview->u.tex.first_level;
1961          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1962       }
1963       else if (level0 >= (int) psview->u.tex.last_level) {
1964          args.level = psview->u.tex.last_level;
1965          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1966       }
1967       else {
1968          float levelBlend = frac(lod[j]);
1969          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1970          int c;
1971
1972          args.level = level0;
1973          min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
1974          args.level = level0+1;
1975          min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
1976
1977          for (c = 0; c < 4; c++) {
1978             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1979          }
1980       }
1981    }
1982
1983    if (DEBUG_TEX) {
1984       print_sample_4(__FUNCTION__, rgba);
1985    }
1986 }
1987
1988
1989 /**
1990  * Compute nearest mipmap level from texcoords.
1991  * Then sample the texture level for four elements of a quad.
1992  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
1993  */
1994 static void
1995 mip_filter_nearest(struct sp_sampler_view *sp_sview,
1996                    struct sp_sampler *sp_samp,
1997                    img_filter_func min_filter,
1998                    img_filter_func mag_filter,
1999                    const float s[TGSI_QUAD_SIZE],
2000                    const float t[TGSI_QUAD_SIZE],
2001                    const float p[TGSI_QUAD_SIZE],
2002                    const float c0[TGSI_QUAD_SIZE],
2003                    const float lod_in[TGSI_QUAD_SIZE],
2004                    const struct filter_args *filt_args,
2005                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2006 {
2007    const struct pipe_sampler_view *psview = &sp_sview->base;
2008    float lod[TGSI_QUAD_SIZE];
2009    int j;
2010    struct img_filter_args args;
2011
2012    args.offset = filt_args->offset;
2013    args.gather_only = filt_args->control == tgsi_sampler_gather;
2014    args.gather_comp = get_gather_component(lod_in);
2015
2016    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2017
2018    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2019       args.s = s[j];
2020       args.t = t[j];
2021       args.p = p[j];
2022       args.face_id = sp_sview->faces[j];
2023
2024       if (lod[j] < 0.0) {
2025          args.level = psview->u.tex.first_level;
2026          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2027       } else {
2028          int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
2029          args.level = MIN2(level, (int)psview->u.tex.last_level);
2030          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2031       }
2032    }
2033
2034    if (DEBUG_TEX) {
2035       print_sample_4(__FUNCTION__, rgba);
2036    }
2037 }
2038
2039
2040 static void
2041 mip_filter_none(struct sp_sampler_view *sp_sview,
2042                 struct sp_sampler *sp_samp,
2043                 img_filter_func min_filter,
2044                 img_filter_func mag_filter,
2045                 const float s[TGSI_QUAD_SIZE],
2046                 const float t[TGSI_QUAD_SIZE],
2047                 const float p[TGSI_QUAD_SIZE],
2048                 const float c0[TGSI_QUAD_SIZE],
2049                 const float lod_in[TGSI_QUAD_SIZE],
2050                 const struct filter_args *filt_args,
2051                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2052 {
2053    float lod[TGSI_QUAD_SIZE];
2054    int j;
2055    struct img_filter_args args;
2056
2057    args.level = sp_sview->base.u.tex.first_level;
2058    args.offset = filt_args->offset;
2059    args.gather_only = filt_args->control == tgsi_sampler_gather;
2060
2061    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2062
2063    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2064       args.s = s[j];
2065       args.t = t[j];
2066       args.p = p[j];
2067       args.face_id = sp_sview->faces[j];
2068       if (lod[j] < 0.0) {
2069          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2070       }
2071       else {
2072          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2073       }
2074    }
2075 }
2076
2077
2078 static void
2079 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
2080                                  struct sp_sampler *sp_samp,
2081                                  img_filter_func min_filter,
2082                                  img_filter_func mag_filter,
2083                                  const float s[TGSI_QUAD_SIZE],
2084                                  const float t[TGSI_QUAD_SIZE],
2085                                  const float p[TGSI_QUAD_SIZE],
2086                                  const float c0[TGSI_QUAD_SIZE],
2087                                  const float lod_in[TGSI_QUAD_SIZE],
2088                                  const struct filter_args *filt_args,
2089                                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2090 {
2091    int j;
2092    struct img_filter_args args;
2093    args.level = sp_sview->base.u.tex.first_level;
2094    args.offset = filt_args->offset;
2095    args.gather_only = filt_args->control == tgsi_sampler_gather;
2096    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2097       args.s = s[j];
2098       args.t = t[j];
2099       args.p = p[j];
2100       args.face_id = sp_sview->faces[j];
2101       mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2102    }
2103 }
2104
2105
2106 /* For anisotropic filtering */
2107 #define WEIGHT_LUT_SIZE 1024
2108
2109 static float *weightLut = NULL;
2110
2111 /**
2112  * Creates the look-up table used to speed-up EWA sampling
2113  */
2114 static void
2115 create_filter_table(void)
2116 {
2117    unsigned i;
2118    if (!weightLut) {
2119       weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
2120
2121       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
2122          float alpha = 2;
2123          float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2124          float weight = (float) exp(-alpha * r2);
2125          weightLut[i] = weight;
2126       }
2127    }
2128 }
2129
2130
2131 /**
2132  * Elliptical weighted average (EWA) filter for producing high quality
2133  * anisotropic filtered results.
2134  * Based on the Higher Quality Elliptical Weighted Average Filter
2135  * published by Paul S. Heckbert in his Master's Thesis
2136  * "Fundamentals of Texture Mapping and Image Warping" (1989)
2137  */
2138 static void
2139 img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
2140                   struct sp_sampler *sp_samp,
2141                   img_filter_func min_filter,
2142                   img_filter_func mag_filter,
2143                   const float s[TGSI_QUAD_SIZE],
2144                   const float t[TGSI_QUAD_SIZE],
2145                   const float p[TGSI_QUAD_SIZE],
2146                   unsigned level,
2147                   const float dudx, const float dvdx,
2148                   const float dudy, const float dvdy,
2149                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2150 {
2151    const struct pipe_resource *texture = sp_sview->base.texture;
2152
2153    // ??? Won't the image filters blow up if level is negative?
2154    unsigned level0 = level > 0 ? level : 0;
2155    float scaling = 1.0f / (1 << level0);
2156    int width = u_minify(texture->width0, level0);
2157    int height = u_minify(texture->height0, level0);
2158    struct img_filter_args args;
2159    float ux = dudx * scaling;
2160    float vx = dvdx * scaling;
2161    float uy = dudy * scaling;
2162    float vy = dvdy * scaling;
2163
2164    /* compute ellipse coefficients to bound the region:
2165     * A*x*x + B*x*y + C*y*y = F.
2166     */
2167    float A = vx*vx+vy*vy+1;
2168    float B = -2*(ux*vx+uy*vy);
2169    float C = ux*ux+uy*uy+1;
2170    float F = A*C-B*B/4.0f;
2171
2172    /* check if it is an ellipse */
2173    /* assert(F > 0.0); */
2174
2175    /* Compute the ellipse's (u,v) bounding box in texture space */
2176    float d = -B*B+4.0f*C*A;
2177    float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
2178    float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
2179
2180    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2181    float s_buffer[TGSI_QUAD_SIZE];
2182    float t_buffer[TGSI_QUAD_SIZE];
2183    float weight_buffer[TGSI_QUAD_SIZE];
2184    unsigned buffer_next;
2185    int j;
2186    float den; /* = 0.0F; */
2187    float ddq;
2188    float U; /* = u0 - tex_u; */
2189    int v;
2190
2191    /* Scale ellipse formula to directly index the Filter Lookup Table.
2192     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2193     */
2194    double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
2195    A *= formScale;
2196    B *= formScale;
2197    C *= formScale;
2198    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2199
2200    /* For each quad, the du and dx values are the same and so the ellipse is
2201     * also the same. Note that texel/image access can only be performed using
2202     * a quad, i.e. it is not possible to get the pixel value for a single
2203     * tex coord. In order to have a better performance, the access is buffered
2204     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
2205     * full, then the pixel values are read from the image.
2206     */
2207    ddq = 2 * A;
2208
2209    args.level = level;
2210    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2211       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2212        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2213        * value, q, is less than F, we're inside the ellipse
2214        */
2215       float tex_u = -0.5F + s[j] * texture->width0 * scaling;
2216       float tex_v = -0.5F + t[j] * texture->height0 * scaling;
2217
2218       int u0 = (int) floorf(tex_u - box_u);
2219       int u1 = (int) ceilf(tex_u + box_u);
2220       int v0 = (int) floorf(tex_v - box_v);
2221       int v1 = (int) ceilf(tex_v + box_v);
2222
2223       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
2224       buffer_next = 0;
2225       den = 0;
2226       args.face_id = sp_sview->faces[j];
2227
2228       U = u0 - tex_u;
2229       for (v = v0; v <= v1; ++v) {
2230          float V = v - tex_v;
2231          float dq = A * (2 * U + 1) + B * V;
2232          float q = (C * V + B * U) * V + A * U * U;
2233
2234          int u;
2235          for (u = u0; u <= u1; ++u) {
2236             /* Note that the ellipse has been pre-scaled so F =
2237              * WEIGHT_LUT_SIZE - 1
2238              */
2239             if (q < WEIGHT_LUT_SIZE) {
2240                /* as a LUT is used, q must never be negative;
2241                 * should not happen, though
2242                 */
2243                const int qClamped = q >= 0.0F ? q : 0;
2244                float weight = weightLut[qClamped];
2245
2246                weight_buffer[buffer_next] = weight;
2247                s_buffer[buffer_next] = u / ((float) width);
2248                t_buffer[buffer_next] = v / ((float) height);
2249
2250                buffer_next++;
2251                if (buffer_next == TGSI_QUAD_SIZE) {
2252                   /* 4 texel coords are in the buffer -> read it now */
2253                   unsigned jj;
2254                   /* it is assumed that samp->min_img_filter is set to
2255                    * img_filter_2d_nearest or one of the
2256                    * accelerated img_filter_2d_nearest_XXX functions.
2257                    */
2258                   for (jj = 0; jj < buffer_next; jj++) {
2259                      args.s = s_buffer[jj];
2260                      args.t = t_buffer[jj];
2261                      args.p = p[jj];
2262                      min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2263                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2264                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2265                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2266                      num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2267                   }
2268
2269                   buffer_next = 0;
2270                }
2271
2272                den += weight;
2273             }
2274             q += dq;
2275             dq += ddq;
2276          }
2277       }
2278
2279       /* if the tex coord buffer contains unread values, we will read
2280        * them now.
2281        */
2282       if (buffer_next > 0) {
2283          unsigned jj;
2284          /* it is assumed that samp->min_img_filter is set to
2285           * img_filter_2d_nearest or one of the
2286           * accelerated img_filter_2d_nearest_XXX functions.
2287           */
2288          for (jj = 0; jj < buffer_next; jj++) {
2289             args.s = s_buffer[jj];
2290             args.t = t_buffer[jj];
2291             args.p = p[jj];
2292             min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2293             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2294             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2295             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2296             num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2297          }
2298       }
2299
2300       if (den <= 0.0F) {
2301          /* Reaching this place would mean that no pixels intersected
2302           * the ellipse.  This should never happen because the filter
2303           * we use always intersects at least one pixel.
2304           */
2305
2306          /*rgba[0]=0;
2307          rgba[1]=0;
2308          rgba[2]=0;
2309          rgba[3]=0;*/
2310          /* not enough pixels in resampling, resort to direct interpolation */
2311          args.s = s[j];
2312          args.t = t[j];
2313          args.p = p[j];
2314          min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
2315          den = 1;
2316          num[0] = rgba_temp[0][j];
2317          num[1] = rgba_temp[1][j];
2318          num[2] = rgba_temp[2][j];
2319          num[3] = rgba_temp[3][j];
2320       }
2321
2322       rgba[0][j] = num[0] / den;
2323       rgba[1][j] = num[1] / den;
2324       rgba[2][j] = num[2] / den;
2325       rgba[3][j] = num[3] / den;
2326    }
2327 }
2328
2329
2330 /**
2331  * Sample 2D texture using an anisotropic filter.
2332  */
2333 static void
2334 mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
2335                         struct sp_sampler *sp_samp,
2336                         img_filter_func min_filter,
2337                         img_filter_func mag_filter,
2338                         const float s[TGSI_QUAD_SIZE],
2339                         const float t[TGSI_QUAD_SIZE],
2340                         const float p[TGSI_QUAD_SIZE],
2341                         const float c0[TGSI_QUAD_SIZE],
2342                         const float lod_in[TGSI_QUAD_SIZE],
2343                         const struct filter_args *filt_args,
2344                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2345 {
2346    const struct pipe_resource *texture = sp_sview->base.texture;
2347    const struct pipe_sampler_view *psview = &sp_sview->base;
2348    int level0;
2349    float lambda;
2350    float lod[TGSI_QUAD_SIZE];
2351
2352    float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
2353    float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
2354    float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2355    float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2356    float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2357    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2358    struct img_filter_args args;
2359
2360    if (filt_args->control == tgsi_sampler_lod_bias ||
2361        filt_args->control == tgsi_sampler_lod_none ||
2362        /* XXX FIXME */
2363        filt_args->control == tgsi_sampler_derivs_explicit) {
2364       /* note: instead of working with Px and Py, we will use the
2365        * squared length instead, to avoid sqrt.
2366        */
2367       float Px2 = dudx * dudx + dvdx * dvdx;
2368       float Py2 = dudy * dudy + dvdy * dvdy;
2369
2370       float Pmax2;
2371       float Pmin2;
2372       float e;
2373       const float maxEccentricity = sp_samp->base.max_anisotropy * sp_samp->base.max_anisotropy;
2374
2375       if (Px2 < Py2) {
2376          Pmax2 = Py2;
2377          Pmin2 = Px2;
2378       }
2379       else {
2380          Pmax2 = Px2;
2381          Pmin2 = Py2;
2382       }
2383
2384       /* if the eccentricity of the ellipse is too big, scale up the shorter
2385        * of the two vectors to limit the maximum amount of work per pixel
2386        */
2387       e = Pmax2 / Pmin2;
2388       if (e > maxEccentricity) {
2389          /* float s=e / maxEccentricity;
2390             minor[0] *= s;
2391             minor[1] *= s;
2392             Pmin2 *= s; */
2393          Pmin2 = Pmax2 / maxEccentricity;
2394       }
2395
2396       /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2397        * this since 0.5*log(x) = log(sqrt(x))
2398        */
2399       lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
2400       compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
2401    }
2402    else {
2403       assert(filt_args->control == tgsi_sampler_lod_explicit ||
2404              filt_args->control == tgsi_sampler_lod_zero);
2405       compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
2406    }
2407
2408    /* XXX: Take into account all lod values.
2409     */
2410    lambda = lod[0];
2411    level0 = psview->u.tex.first_level + (int)lambda;
2412
2413    /* If the ellipse covers the whole image, we can
2414     * simply return the average of the whole image.
2415     */
2416    if (level0 >= (int) psview->u.tex.last_level) {
2417       int j;
2418       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2419          args.s = s[j];
2420          args.t = t[j];
2421          args.p = p[j];
2422          args.level = psview->u.tex.last_level;
2423          args.face_id = sp_sview->faces[j];
2424          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2425       }
2426    }
2427    else {
2428       /* don't bother interpolating between multiple LODs; it doesn't
2429        * seem to be worth the extra running time.
2430        */
2431       img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
2432                         s, t, p, level0,
2433                         dudx, dvdx, dudy, dvdy, rgba);
2434    }
2435
2436    if (DEBUG_TEX) {
2437       print_sample_4(__FUNCTION__, rgba);
2438    }
2439 }
2440
2441
2442 /**
2443  * Specialized version of mip_filter_linear with hard-wired calls to
2444  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2445  */
2446 static void
2447 mip_filter_linear_2d_linear_repeat_POT(
2448    struct sp_sampler_view *sp_sview,
2449    struct sp_sampler *sp_samp,
2450    img_filter_func min_filter,
2451    img_filter_func mag_filter,
2452    const float s[TGSI_QUAD_SIZE],
2453    const float t[TGSI_QUAD_SIZE],
2454    const float p[TGSI_QUAD_SIZE],
2455    const float c0[TGSI_QUAD_SIZE],
2456    const float lod_in[TGSI_QUAD_SIZE],
2457    const struct filter_args *filt_args,
2458    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2459 {
2460    const struct pipe_sampler_view *psview = &sp_sview->base;
2461    int j;
2462    float lod[TGSI_QUAD_SIZE];
2463
2464    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2465
2466    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2467       int level0 = psview->u.tex.first_level + (int)lod[j];
2468       struct img_filter_args args;
2469       /* Catches both negative and large values of level0:
2470        */
2471       args.s = s[j];
2472       args.t = t[j];
2473       args.p = p[j];
2474       args.face_id = sp_sview->faces[j];
2475       args.gather_only = filt_args->control == tgsi_sampler_gather;
2476       if ((unsigned)level0 >= psview->u.tex.last_level) {
2477          if (level0 < 0)
2478             args.level = psview->u.tex.first_level;
2479          else
2480             args.level = psview->u.tex.last_level;
2481          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
2482                                          &rgba[0][j]);
2483
2484       }
2485       else {
2486          float levelBlend = frac(lod[j]);
2487          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2488          int c;
2489
2490          args.level = level0;
2491          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
2492          args.level = level0+1;
2493          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
2494
2495          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2496             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2497       }
2498    }
2499
2500    if (DEBUG_TEX) {
2501       print_sample_4(__FUNCTION__, rgba);
2502    }
2503 }
2504
2505
2506 /**
2507  * Do shadow/depth comparisons.
2508  */
2509 static void
2510 sample_compare(struct sp_sampler_view *sp_sview,
2511                struct sp_sampler *sp_samp,
2512                const float s[TGSI_QUAD_SIZE],
2513                const float t[TGSI_QUAD_SIZE],
2514                const float p[TGSI_QUAD_SIZE],
2515                const float c0[TGSI_QUAD_SIZE],
2516                const float c1[TGSI_QUAD_SIZE],
2517                enum tgsi_sampler_control control,
2518                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2519 {
2520    const struct pipe_sampler_state *sampler = &sp_samp->base;
2521    int j, v;
2522    int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2523    float pc[4];
2524    const struct util_format_description *format_desc;
2525    unsigned chan_type;
2526    bool is_gather = (control == tgsi_sampler_gather);
2527
2528    /**
2529     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2530     * for 2D Array texture we need to use the 'c0' (aka Q).
2531     * When we sampled the depth texture, the depth value was put into all
2532     * RGBA channels.  We look at the red channel here.
2533     */
2534
2535    if (sp_sview->base.target == PIPE_TEXTURE_2D_ARRAY ||
2536        sp_sview->base.target == PIPE_TEXTURE_CUBE) {
2537       pc[0] = c0[0];
2538       pc[1] = c0[1];
2539       pc[2] = c0[2];
2540       pc[3] = c0[3];
2541    } else if (sp_sview->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
2542       pc[0] = c1[0];
2543       pc[1] = c1[1];
2544       pc[2] = c1[2];
2545       pc[3] = c1[3];
2546    } else {
2547       pc[0] = p[0];
2548       pc[1] = p[1];
2549       pc[2] = p[2];
2550       pc[3] = p[3];
2551    }
2552
2553    format_desc = util_format_description(sp_sview->base.format);
2554    /* not entirely sure we couldn't end up with non-valid swizzle here */
2555    chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
2556                   format_desc->channel[format_desc->swizzle[0]].type :
2557                   UTIL_FORMAT_TYPE_FLOAT;
2558    if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2559       /*
2560        * clamping is a result of conversion to texture format, hence
2561        * doesn't happen with floats. Technically also should do comparison
2562        * in texture format (quantization!).
2563        */
2564       pc[0] = CLAMP(pc[0], 0.0F, 1.0F);
2565       pc[1] = CLAMP(pc[1], 0.0F, 1.0F);
2566       pc[2] = CLAMP(pc[2], 0.0F, 1.0F);
2567       pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
2568    }
2569
2570    for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
2571       /* compare four texcoords vs. four texture samples */
2572       switch (sampler->compare_func) {
2573       case PIPE_FUNC_LESS:
2574          k[v][0] = pc[0] < rgba[v][0];
2575          k[v][1] = pc[1] < rgba[v][1];
2576          k[v][2] = pc[2] < rgba[v][2];
2577          k[v][3] = pc[3] < rgba[v][3];
2578          break;
2579       case PIPE_FUNC_LEQUAL:
2580          k[v][0] = pc[0] <= rgba[v][0];
2581          k[v][1] = pc[1] <= rgba[v][1];
2582          k[v][2] = pc[2] <= rgba[v][2];
2583          k[v][3] = pc[3] <= rgba[v][3];
2584          break;
2585       case PIPE_FUNC_GREATER:
2586          k[v][0] = pc[0] > rgba[v][0];
2587          k[v][1] = pc[1] > rgba[v][1];
2588          k[v][2] = pc[2] > rgba[v][2];
2589          k[v][3] = pc[3] > rgba[v][3];
2590          break;
2591       case PIPE_FUNC_GEQUAL:
2592          k[v][0] = pc[0] >= rgba[v][0];
2593          k[v][1] = pc[1] >= rgba[v][1];
2594          k[v][2] = pc[2] >= rgba[v][2];
2595          k[v][3] = pc[3] >= rgba[v][3];
2596          break;
2597       case PIPE_FUNC_EQUAL:
2598          k[v][0] = pc[0] == rgba[v][0];
2599          k[v][1] = pc[1] == rgba[v][1];
2600          k[v][2] = pc[2] == rgba[v][2];
2601          k[v][3] = pc[3] == rgba[v][3];
2602          break;
2603       case PIPE_FUNC_NOTEQUAL:
2604          k[v][0] = pc[0] != rgba[v][0];
2605          k[v][1] = pc[1] != rgba[v][1];
2606          k[v][2] = pc[2] != rgba[v][2];
2607          k[v][3] = pc[3] != rgba[v][3];
2608          break;
2609       case PIPE_FUNC_ALWAYS:
2610          k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
2611          break;
2612       case PIPE_FUNC_NEVER:
2613          k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2614          break;
2615       default:
2616          k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2617          assert(0);
2618          break;
2619       }
2620    }
2621
2622    if (is_gather) {
2623       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2624          for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
2625             rgba[v][j] = k[v][j];
2626          }
2627       }
2628    } else {
2629       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2630          rgba[0][j] = k[0][j];
2631          rgba[1][j] = k[0][j];
2632          rgba[2][j] = k[0][j];
2633          rgba[3][j] = 1.0F;
2634       }
2635    }
2636 }
2637
2638 static void
2639 do_swizzling(const struct pipe_sampler_view *sview,
2640              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2641              float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2642 {
2643    int j;
2644    const unsigned swizzle_r = sview->swizzle_r;
2645    const unsigned swizzle_g = sview->swizzle_g;
2646    const unsigned swizzle_b = sview->swizzle_b;
2647    const unsigned swizzle_a = sview->swizzle_a;
2648
2649    switch (swizzle_r) {
2650    case PIPE_SWIZZLE_ZERO:
2651       for (j = 0; j < 4; j++)
2652          out[0][j] = 0.0f;
2653       break;
2654    case PIPE_SWIZZLE_ONE:
2655       for (j = 0; j < 4; j++)
2656          out[0][j] = 1.0f;
2657       break;
2658    default:
2659       assert(swizzle_r < 4);
2660       for (j = 0; j < 4; j++)
2661          out[0][j] = in[swizzle_r][j];
2662    }
2663
2664    switch (swizzle_g) {
2665    case PIPE_SWIZZLE_ZERO:
2666       for (j = 0; j < 4; j++)
2667          out[1][j] = 0.0f;
2668       break;
2669    case PIPE_SWIZZLE_ONE:
2670       for (j = 0; j < 4; j++)
2671          out[1][j] = 1.0f;
2672       break;
2673    default:
2674       assert(swizzle_g < 4);
2675       for (j = 0; j < 4; j++)
2676          out[1][j] = in[swizzle_g][j];
2677    }
2678
2679    switch (swizzle_b) {
2680    case PIPE_SWIZZLE_ZERO:
2681       for (j = 0; j < 4; j++)
2682          out[2][j] = 0.0f;
2683       break;
2684    case PIPE_SWIZZLE_ONE:
2685       for (j = 0; j < 4; j++)
2686          out[2][j] = 1.0f;
2687       break;
2688    default:
2689       assert(swizzle_b < 4);
2690       for (j = 0; j < 4; j++)
2691          out[2][j] = in[swizzle_b][j];
2692    }
2693
2694    switch (swizzle_a) {
2695    case PIPE_SWIZZLE_ZERO:
2696       for (j = 0; j < 4; j++)
2697          out[3][j] = 0.0f;
2698       break;
2699    case PIPE_SWIZZLE_ONE:
2700       for (j = 0; j < 4; j++)
2701          out[3][j] = 1.0f;
2702       break;
2703    default:
2704       assert(swizzle_a < 4);
2705       for (j = 0; j < 4; j++)
2706          out[3][j] = in[swizzle_a][j];
2707    }
2708 }
2709
2710
2711 static wrap_nearest_func
2712 get_nearest_unorm_wrap(unsigned mode)
2713 {
2714    switch (mode) {
2715    case PIPE_TEX_WRAP_CLAMP:
2716       return wrap_nearest_unorm_clamp;
2717    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2718       return wrap_nearest_unorm_clamp_to_edge;
2719    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2720       return wrap_nearest_unorm_clamp_to_border;
2721    default:
2722       debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2723       return wrap_nearest_unorm_clamp;
2724    }
2725 }
2726
2727
2728 static wrap_nearest_func
2729 get_nearest_wrap(unsigned mode)
2730 {
2731    switch (mode) {
2732    case PIPE_TEX_WRAP_REPEAT:
2733       return wrap_nearest_repeat;
2734    case PIPE_TEX_WRAP_CLAMP:
2735       return wrap_nearest_clamp;
2736    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2737       return wrap_nearest_clamp_to_edge;
2738    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2739       return wrap_nearest_clamp_to_border;
2740    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2741       return wrap_nearest_mirror_repeat;
2742    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2743       return wrap_nearest_mirror_clamp;
2744    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2745       return wrap_nearest_mirror_clamp_to_edge;
2746    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2747       return wrap_nearest_mirror_clamp_to_border;
2748    default:
2749       assert(0);
2750       return wrap_nearest_repeat;
2751    }
2752 }
2753
2754
2755 static wrap_linear_func
2756 get_linear_unorm_wrap(unsigned mode)
2757 {
2758    switch (mode) {
2759    case PIPE_TEX_WRAP_CLAMP:
2760       return wrap_linear_unorm_clamp;
2761    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2762       return wrap_linear_unorm_clamp_to_edge;
2763    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2764       return wrap_linear_unorm_clamp_to_border;
2765    default:
2766       debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2767       return wrap_linear_unorm_clamp;
2768    }
2769 }
2770
2771
2772 static wrap_linear_func
2773 get_linear_wrap(unsigned mode)
2774 {
2775    switch (mode) {
2776    case PIPE_TEX_WRAP_REPEAT:
2777       return wrap_linear_repeat;
2778    case PIPE_TEX_WRAP_CLAMP:
2779       return wrap_linear_clamp;
2780    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2781       return wrap_linear_clamp_to_edge;
2782    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2783       return wrap_linear_clamp_to_border;
2784    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2785       return wrap_linear_mirror_repeat;
2786    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2787       return wrap_linear_mirror_clamp;
2788    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2789       return wrap_linear_mirror_clamp_to_edge;
2790    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2791       return wrap_linear_mirror_clamp_to_border;
2792    default:
2793       assert(0);
2794       return wrap_linear_repeat;
2795    }
2796 }
2797
2798
2799 /**
2800  * Is swizzling needed for the given state key?
2801  */
2802 static INLINE bool
2803 any_swizzle(const struct pipe_sampler_view *view)
2804 {
2805    return (view->swizzle_r != PIPE_SWIZZLE_RED ||
2806            view->swizzle_g != PIPE_SWIZZLE_GREEN ||
2807            view->swizzle_b != PIPE_SWIZZLE_BLUE ||
2808            view->swizzle_a != PIPE_SWIZZLE_ALPHA);
2809 }
2810
2811
2812 static img_filter_func
2813 get_img_filter(const struct sp_sampler_view *sp_sview,
2814                const struct pipe_sampler_state *sampler,
2815                unsigned filter, bool gather)
2816 {
2817    switch (sp_sview->base.target) {
2818    case PIPE_BUFFER:
2819    case PIPE_TEXTURE_1D:
2820       if (filter == PIPE_TEX_FILTER_NEAREST)
2821          return img_filter_1d_nearest;
2822       else
2823          return img_filter_1d_linear;
2824       break;
2825    case PIPE_TEXTURE_1D_ARRAY:
2826       if (filter == PIPE_TEX_FILTER_NEAREST)
2827          return img_filter_1d_array_nearest;
2828       else
2829          return img_filter_1d_array_linear;
2830       break;
2831    case PIPE_TEXTURE_2D:
2832    case PIPE_TEXTURE_RECT:
2833       /* Try for fast path:
2834        */
2835       if (!gather && sp_sview->pot2d &&
2836           sampler->wrap_s == sampler->wrap_t &&
2837           sampler->normalized_coords)
2838       {
2839          switch (sampler->wrap_s) {
2840          case PIPE_TEX_WRAP_REPEAT:
2841             switch (filter) {
2842             case PIPE_TEX_FILTER_NEAREST:
2843                return img_filter_2d_nearest_repeat_POT;
2844             case PIPE_TEX_FILTER_LINEAR:
2845                return img_filter_2d_linear_repeat_POT;
2846             default:
2847                break;
2848             }
2849             break;
2850          case PIPE_TEX_WRAP_CLAMP:
2851             switch (filter) {
2852             case PIPE_TEX_FILTER_NEAREST:
2853                return img_filter_2d_nearest_clamp_POT;
2854             default:
2855                break;
2856             }
2857          }
2858       }
2859       /* Otherwise use default versions:
2860        */
2861       if (filter == PIPE_TEX_FILTER_NEAREST)
2862          return img_filter_2d_nearest;
2863       else
2864          return img_filter_2d_linear;
2865       break;
2866    case PIPE_TEXTURE_2D_ARRAY:
2867       if (filter == PIPE_TEX_FILTER_NEAREST)
2868          return img_filter_2d_array_nearest;
2869       else
2870          return img_filter_2d_array_linear;
2871       break;
2872    case PIPE_TEXTURE_CUBE:
2873       if (filter == PIPE_TEX_FILTER_NEAREST)
2874          return img_filter_cube_nearest;
2875       else
2876          return img_filter_cube_linear;
2877       break;
2878    case PIPE_TEXTURE_CUBE_ARRAY:
2879       if (filter == PIPE_TEX_FILTER_NEAREST)
2880          return img_filter_cube_array_nearest;
2881       else
2882          return img_filter_cube_array_linear;
2883       break;
2884    case PIPE_TEXTURE_3D:
2885       if (filter == PIPE_TEX_FILTER_NEAREST)
2886          return img_filter_3d_nearest;
2887       else
2888          return img_filter_3d_linear;
2889       break;
2890    default:
2891       assert(0);
2892       return img_filter_1d_nearest;
2893    }
2894 }
2895
2896
2897 static void
2898 sample_mip(struct sp_sampler_view *sp_sview,
2899            struct sp_sampler *sp_samp,
2900            const float s[TGSI_QUAD_SIZE],
2901            const float t[TGSI_QUAD_SIZE],
2902            const float p[TGSI_QUAD_SIZE],
2903            const float c0[TGSI_QUAD_SIZE],
2904            const float lod[TGSI_QUAD_SIZE],
2905            const struct filter_args *filt_args,
2906            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2907 {
2908    mip_filter_func mip_filter;
2909    img_filter_func min_img_filter = NULL;
2910    img_filter_func mag_img_filter = NULL;
2911
2912    if (filt_args->control == tgsi_sampler_gather) {
2913       mip_filter = mip_filter_nearest;
2914       min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
2915    } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
2916       mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2917    }
2918    else {
2919       mip_filter = sp_samp->mip_filter;
2920       min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
2921       if (sp_samp->min_mag_equal) {
2922          mag_img_filter = min_img_filter;
2923       }
2924       else {
2925          mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
2926       }
2927    }
2928
2929    mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
2930               s, t, p, c0, lod, filt_args, rgba);
2931
2932    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
2933       sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
2934    }
2935
2936    if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
2937       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2938       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2939       do_swizzling(&sp_sview->base, rgba_temp, rgba);
2940    }
2941
2942 }
2943
2944
2945 /**
2946  * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2947  * Put face info into the sampler faces[] array.
2948  */
2949 static void
2950 sample_cube(struct sp_sampler_view *sp_sview,
2951             struct sp_sampler *sp_samp,
2952             const float s[TGSI_QUAD_SIZE],
2953             const float t[TGSI_QUAD_SIZE],
2954             const float p[TGSI_QUAD_SIZE],
2955             const float c0[TGSI_QUAD_SIZE],
2956             const float c1[TGSI_QUAD_SIZE],
2957             const struct filter_args *filt_args,
2958             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2959 {
2960    unsigned j;
2961    float ssss[4], tttt[4];
2962
2963    /* Not actually used, but the intermediate steps that do the
2964     * dereferencing don't know it.
2965     */
2966    static float pppp[4] = { 0, 0, 0, 0 };
2967
2968    pppp[0] = c0[0];
2969    pppp[1] = c0[1];
2970    pppp[2] = c0[2];
2971    pppp[3] = c0[3];
2972    /*
2973      major axis
2974      direction    target                             sc     tc    ma
2975      ----------   -------------------------------    ---    ---   ---
2976      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
2977      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
2978      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
2979      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
2980      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
2981      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
2982    */
2983
2984    /* Choose the cube face and compute new s/t coords for the 2D face.
2985     *
2986     * Use the same cube face for all four pixels in the quad.
2987     *
2988     * This isn't ideal, but if we want to use a different cube face
2989     * per pixel in the quad, we'd have to also compute the per-face
2990     * LOD here too.  That's because the four post-face-selection
2991     * texcoords are no longer related to each other (they're
2992     * per-face!)  so we can't use subtraction to compute the partial
2993     * deriviates to compute the LOD.  Doing so (near cube edges
2994     * anyway) gives us pretty much random values.
2995     */
2996    {
2997       /* use the average of the four pixel's texcoords to choose the face */
2998       const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2999       const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
3000       const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
3001       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
3002
3003       if (arx >= ary && arx >= arz) {
3004          float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
3005          uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
3006          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3007             const float ima = -0.5F / fabsf(s[j]);
3008             ssss[j] = sign *  p[j] * ima + 0.5F;
3009             tttt[j] =         t[j] * ima + 0.5F;
3010             sp_sview->faces[j] = face;
3011          }
3012       }
3013       else if (ary >= arx && ary >= arz) {
3014          float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
3015          uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
3016          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3017             const float ima = -0.5F / fabsf(t[j]);
3018             ssss[j] =        -s[j] * ima + 0.5F;
3019             tttt[j] = sign * -p[j] * ima + 0.5F;
3020             sp_sview->faces[j] = face;
3021          }
3022       }
3023       else {
3024          float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
3025          uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
3026          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3027             const float ima = -0.5F / fabsf(p[j]);
3028             ssss[j] = sign * -s[j] * ima + 0.5F;
3029             tttt[j] =         t[j] * ima + 0.5F;
3030             sp_sview->faces[j] = face;
3031          }
3032       }
3033    }
3034
3035    sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
3036 }
3037
3038
3039 static void
3040 sp_get_dims(struct sp_sampler_view *sp_sview, int level,
3041             int dims[4])
3042 {
3043    const struct pipe_sampler_view *view = &sp_sview->base;
3044    const struct pipe_resource *texture = view->texture;
3045
3046    if (view->target == PIPE_BUFFER) {
3047       dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1;
3048       /* the other values are undefined, but let's avoid potential valgrind
3049        * warnings.
3050        */
3051       dims[1] = dims[2] = dims[3] = 0;
3052       return;
3053    }
3054
3055    /* undefined according to EXT_gpu_program */
3056    level += view->u.tex.first_level;
3057    if (level > view->u.tex.last_level)
3058       return;
3059
3060    dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
3061    dims[0] = u_minify(texture->width0, level);
3062
3063    switch (view->target) {
3064    case PIPE_TEXTURE_1D_ARRAY:
3065       dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3066       /* fallthrough */
3067    case PIPE_TEXTURE_1D:
3068       return;
3069    case PIPE_TEXTURE_2D_ARRAY:
3070       dims[2] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3071       /* fallthrough */
3072    case PIPE_TEXTURE_2D:
3073    case PIPE_TEXTURE_CUBE:
3074    case PIPE_TEXTURE_RECT:
3075       dims[1] = u_minify(texture->height0, level);
3076       return;
3077    case PIPE_TEXTURE_3D:
3078       dims[1] = u_minify(texture->height0, level);
3079       dims[2] = u_minify(texture->depth0, level);
3080       return;
3081    case PIPE_TEXTURE_CUBE_ARRAY:
3082       dims[1] = u_minify(texture->height0, level);
3083       dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
3084       break;
3085    default:
3086       assert(!"unexpected texture target in sp_get_dims()");
3087       return;
3088    }
3089 }
3090
3091 /**
3092  * This function is only used for getting unfiltered texels via the
3093  * TXF opcode.  The GL spec says that out-of-bounds texel fetches
3094  * produce undefined results.  Instead of crashing, lets just clamp
3095  * coords to the texture image size.
3096  */
3097 static void
3098 sp_get_texels(struct sp_sampler_view *sp_sview,
3099               const int v_i[TGSI_QUAD_SIZE],
3100               const int v_j[TGSI_QUAD_SIZE],
3101               const int v_k[TGSI_QUAD_SIZE],
3102               const int lod[TGSI_QUAD_SIZE],
3103               const int8_t offset[3],
3104               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3105 {
3106    union tex_tile_address addr;
3107    const struct pipe_resource *texture = sp_sview->base.texture;
3108    int j, c;
3109    const float *tx;
3110    int width, height, depth;
3111
3112    addr.value = 0;
3113    /* TODO write a better test for LOD */
3114    addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
3115                         CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
3116                               sp_sview->base.u.tex.first_level,
3117                               sp_sview->base.u.tex.last_level);
3118
3119    width = u_minify(texture->width0, addr.bits.level);
3120    height = u_minify(texture->height0, addr.bits.level);
3121    depth = u_minify(texture->depth0, addr.bits.level);
3122
3123    switch (sp_sview->base.target) {
3124    case PIPE_BUFFER:
3125    case PIPE_TEXTURE_1D:
3126       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3127          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3128          tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
3129          for (c = 0; c < 4; c++) {
3130             rgba[c][j] = tx[c];
3131          }
3132       }
3133       break;
3134    case PIPE_TEXTURE_1D_ARRAY:
3135       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3136          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3137          int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
3138                        sp_sview->base.u.tex.last_layer);
3139          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3140          for (c = 0; c < 4; c++) {
3141             rgba[c][j] = tx[c];
3142          }
3143       }
3144       break;
3145    case PIPE_TEXTURE_2D:
3146    case PIPE_TEXTURE_RECT:
3147       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3148          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3149          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3150          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3151          for (c = 0; c < 4; c++) {
3152             rgba[c][j] = tx[c];
3153          }
3154       }
3155       break;
3156    case PIPE_TEXTURE_2D_ARRAY:
3157       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3158          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3159          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3160          int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
3161                            sp_sview->base.u.tex.last_layer);
3162          tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
3163          for (c = 0; c < 4; c++) {
3164             rgba[c][j] = tx[c];
3165          }
3166       }
3167       break;
3168    case PIPE_TEXTURE_3D:
3169       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3170          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3171          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3172          int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
3173          tx = get_texel_3d_no_border(sp_sview, addr, x, y, z);
3174          for (c = 0; c < 4; c++) {
3175             rgba[c][j] = tx[c];
3176          }
3177       }
3178       break;
3179    case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
3180    default:
3181       assert(!"Unknown or CUBE texture type in TXF processing\n");
3182       break;
3183    }
3184
3185    if (sp_sview->need_swizzle) {
3186       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3187       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3188       do_swizzling(&sp_sview->base, rgba_temp, rgba);
3189    }
3190 }
3191
3192
3193 void *
3194 softpipe_create_sampler_state(struct pipe_context *pipe,
3195                               const struct pipe_sampler_state *sampler)
3196 {
3197    struct sp_sampler *samp = CALLOC_STRUCT(sp_sampler);
3198
3199    samp->base = *sampler;
3200
3201    /* Note that (for instance) linear_texcoord_s and
3202     * nearest_texcoord_s may be active at the same time, if the
3203     * sampler min_img_filter differs from its mag_img_filter.
3204     */
3205    if (sampler->normalized_coords) {
3206       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
3207       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
3208       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
3209
3210       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
3211       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
3212       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
3213    }
3214    else {
3215       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
3216       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
3217       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
3218
3219       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
3220       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
3221       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
3222    }
3223
3224    samp->min_img_filter = sampler->min_img_filter;
3225
3226    switch (sampler->min_mip_filter) {
3227    case PIPE_TEX_MIPFILTER_NONE:
3228       if (sampler->min_img_filter == sampler->mag_img_filter)
3229          samp->mip_filter = mip_filter_none_no_filter_select;
3230       else
3231          samp->mip_filter = mip_filter_none;
3232       break;
3233
3234    case PIPE_TEX_MIPFILTER_NEAREST:
3235       samp->mip_filter = mip_filter_nearest;
3236       break;
3237
3238    case PIPE_TEX_MIPFILTER_LINEAR:
3239       if (sampler->min_img_filter == sampler->mag_img_filter &&
3240           sampler->normalized_coords &&
3241           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
3242           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
3243           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
3244           sampler->max_anisotropy <= 1) {
3245          samp->min_mag_equal_repeat_linear = TRUE;
3246       }
3247       samp->mip_filter = mip_filter_linear;
3248
3249       /* Anisotropic filtering extension. */
3250       if (sampler->max_anisotropy > 1) {
3251          samp->mip_filter = mip_filter_linear_aniso;
3252
3253          /* Override min_img_filter:
3254           * min_img_filter needs to be set to NEAREST since we need to access
3255           * each texture pixel as it is and weight it later; using linear
3256           * filters will have incorrect results.
3257           * By setting the filter to NEAREST here, we can avoid calling the
3258           * generic img_filter_2d_nearest in the anisotropic filter function,
3259           * making it possible to use one of the accelerated implementations
3260           */
3261          samp->min_img_filter = PIPE_TEX_FILTER_NEAREST;
3262
3263          /* on first access create the lookup table containing the filter weights. */
3264         if (!weightLut) {
3265            create_filter_table();
3266         }
3267       }
3268       break;
3269    }
3270    if (samp->min_img_filter == sampler->mag_img_filter) {
3271       samp->min_mag_equal = TRUE;
3272    }
3273
3274    return (void *)samp;
3275 }
3276
3277
3278 compute_lambda_func
3279 softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader)
3280 {
3281    if (shader != PIPE_SHADER_FRAGMENT)
3282       return compute_lambda_vert;
3283
3284    switch (view->target) {
3285    case PIPE_BUFFER:
3286    case PIPE_TEXTURE_1D:
3287    case PIPE_TEXTURE_1D_ARRAY:
3288       return compute_lambda_1d;
3289    case PIPE_TEXTURE_2D:
3290    case PIPE_TEXTURE_2D_ARRAY:
3291    case PIPE_TEXTURE_RECT:
3292    case PIPE_TEXTURE_CUBE:
3293    case PIPE_TEXTURE_CUBE_ARRAY:
3294       return compute_lambda_2d;
3295    case PIPE_TEXTURE_3D:
3296       return compute_lambda_3d;
3297    default:
3298       assert(0);
3299       return compute_lambda_1d;
3300    }
3301 }
3302
3303
3304 struct pipe_sampler_view *
3305 softpipe_create_sampler_view(struct pipe_context *pipe,
3306                              struct pipe_resource *resource,
3307                              const struct pipe_sampler_view *templ)
3308 {
3309    struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
3310    struct softpipe_resource *spr = (struct softpipe_resource *)resource;
3311
3312    if (sview) {
3313       struct pipe_sampler_view *view = &sview->base;
3314       *view = *templ;
3315       view->reference.count = 1;
3316       view->texture = NULL;
3317       pipe_resource_reference(&view->texture, resource);
3318       view->context = pipe;
3319
3320 #ifdef DEBUG
3321      /*
3322       * This is possibly too lenient, but the primary reason is just
3323       * to catch state trackers which forget to initialize this, so
3324       * it only catches clearly impossible view targets.
3325       */
3326       if (view->target != resource->target) {
3327          if (view->target == PIPE_TEXTURE_1D)
3328             assert(resource->target == PIPE_TEXTURE_1D_ARRAY);
3329          else if (view->target == PIPE_TEXTURE_1D_ARRAY)
3330             assert(resource->target == PIPE_TEXTURE_1D);
3331          else if (view->target == PIPE_TEXTURE_2D)
3332             assert(resource->target == PIPE_TEXTURE_2D_ARRAY ||
3333                    resource->target == PIPE_TEXTURE_CUBE ||
3334                    resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3335          else if (view->target == PIPE_TEXTURE_2D_ARRAY)
3336             assert(resource->target == PIPE_TEXTURE_2D ||
3337                    resource->target == PIPE_TEXTURE_CUBE ||
3338                    resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3339          else if (view->target == PIPE_TEXTURE_CUBE)
3340             assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY ||
3341                    resource->target == PIPE_TEXTURE_2D_ARRAY);
3342          else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
3343             assert(resource->target == PIPE_TEXTURE_CUBE ||
3344                    resource->target == PIPE_TEXTURE_2D_ARRAY);
3345          else
3346             assert(0);
3347       }
3348 #endif
3349
3350       if (any_swizzle(view)) {
3351          sview->need_swizzle = TRUE;
3352       }
3353
3354       if (view->target == PIPE_TEXTURE_CUBE ||
3355           view->target == PIPE_TEXTURE_CUBE_ARRAY)
3356          sview->get_samples = sample_cube;
3357       else {
3358          sview->get_samples = sample_mip;
3359       }
3360       sview->pot2d = spr->pot &&
3361                      (view->target == PIPE_TEXTURE_2D ||
3362                       view->target == PIPE_TEXTURE_RECT);
3363
3364       sview->xpot = util_logbase2( resource->width0 );
3365       sview->ypot = util_logbase2( resource->height0 );
3366    }
3367
3368    return (struct pipe_sampler_view *) sview;
3369 }
3370
3371
3372 static void
3373 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3374                  const unsigned sview_index,
3375                  int level, int dims[4])
3376 {
3377    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3378
3379    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3380    /* always have a view here but texture is NULL if no sampler view was set. */
3381    if (!sp_samp->sp_sview[sview_index].base.texture) {
3382       dims[0] = dims[1] = dims[2] = dims[3] = 0;
3383       return;
3384    }
3385    sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
3386 }
3387
3388
3389 static void
3390 sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3391                     const unsigned sview_index,
3392                     const unsigned sampler_index,
3393                     const float s[TGSI_QUAD_SIZE],
3394                     const float t[TGSI_QUAD_SIZE],
3395                     const float p[TGSI_QUAD_SIZE],
3396                     const float c0[TGSI_QUAD_SIZE],
3397                     const float lod[TGSI_QUAD_SIZE],
3398                     float derivs[3][2][TGSI_QUAD_SIZE],
3399                     const int8_t offset[3],
3400                     enum tgsi_sampler_control control,
3401                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3402 {
3403    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3404    struct filter_args filt_args;
3405    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3406    assert(sampler_index < PIPE_MAX_SAMPLERS);
3407    assert(sp_samp->sp_sampler[sampler_index]);
3408    /* always have a view here but texture is NULL if no sampler view was set. */
3409    if (!sp_samp->sp_sview[sview_index].base.texture) {
3410       int i, j;
3411       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3412          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3413             rgba[j][i] = 0.0f;
3414          }
3415       }
3416       return;
3417    }
3418
3419    filt_args.control = control;
3420    filt_args.offset = offset;
3421    sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
3422                                               sp_samp->sp_sampler[sampler_index],
3423                                               s, t, p, c0, lod, &filt_args, rgba);
3424 }
3425
3426
3427 static void
3428 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3429                   const unsigned sview_index,
3430                   const int i[TGSI_QUAD_SIZE],
3431                   const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3432                   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3433                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3434 {
3435    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3436
3437    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3438    /* always have a view here but texture is NULL if no sampler view was set. */
3439    if (!sp_samp->sp_sview[sview_index].base.texture) {
3440       int i, j;
3441       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3442          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3443             rgba[j][i] = 0.0f;
3444          }
3445       }
3446       return;
3447    }
3448    sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
3449 }
3450
3451
3452 struct sp_tgsi_sampler *
3453 sp_create_tgsi_sampler(void)
3454 {
3455    struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3456    if (!samp)
3457       return NULL;
3458
3459    samp->base.get_dims = sp_tgsi_get_dims;
3460    samp->base.get_samples = sp_tgsi_get_samples;
3461    samp->base.get_texel = sp_tgsi_get_texel;
3462
3463    return samp;
3464 }
3465