src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_memory.h"
  43 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  44 #include "sp_tex_sample.h"
  45 #include "sp_tex_tile_cache.h"
  46
  47
  48 /** Set to one to help debug texture sampling */
  49 #define DEBUG_TEX 0
  50
  51
  52 /*
  53  * Return fractional part of 'f'.  Used for computing interpolation weights.
  54  * Need to be careful with negative values.
  55  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  56  * of improperly weighted linear-filtered textures.
  57  * The tests/texwrap.c demo is a good test.
  58  */
  59 static INLINE float
  60 frac(float f)
  61 {
  62    return f - floorf(f);
  63 }
  64
  65
  66
  67 /**
  68  * Linear interpolation macro
  69  */
  70 static INLINE float
  71 lerp(float a, float v0, float v1)
  72 {
  73    return v0 + a * (v1 - v0);
  74 }
  75
  76
  77 /**
  78  * Do 2D/bilinear interpolation of float values.
  79  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  80  * a and b are the horizontal and vertical interpolants.
  81  * It's important that this function is inlined when compiled with
  82  * optimization!  If we find that's not true on some systems, convert
  83  * to a macro.
  84  */
  85 static INLINE float
  86 lerp_2d(float a, float b,
  87         float v00, float v10, float v01, float v11)
  88 {
  89    const float temp0 = lerp(a, v00, v10);
  90    const float temp1 = lerp(a, v01, v11);
  91    return lerp(b, temp0, temp1);
  92 }
  93
  94
  95 /**
  96  * As above, but 3D interpolation of 8 values.
  97  */
  98 static INLINE float
  99 lerp_3d(float a, float b, float c,
 100         float v000, float v100, float v010, float v110,
 101         float v001, float v101, float v011, float v111)
 102 {
 103    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 104    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 105    return lerp(c, temp0, temp1);
 106 }
 107
 108
 109
 110 /**
 111  * Compute coord % size for repeat wrap modes.
 112  * Note that if coord is negative, coord % size doesn't give the right
 113  * value.  To avoid that problem we add a large multiple of the size
 114  * (rather than using a conditional).
 115  */
 116 static INLINE int
 117 repeat(int coord, unsigned size)
 118 {
 119    return (coord + size * 1024) % size;
 120 }
 121
 122
 123 /**
 124  * Apply texture coord wrapping mode and return integer texture indexes
 125  * for a vector of four texcoords (S or T or P).
 126  * \param wrapMode  PIPE_TEX_WRAP_x
 127  * \param s  the incoming texcoords
 128  * \param size  the texture image size
 129  * \param icoord  returns the integer texcoords
 130  * \return  integer texture index
 131  */
 132 static void
 133 wrap_nearest_repeat(float s, unsigned size, int *icoord)
 134 {
 135    /* s limited to [0,1) */
 136    /* i limited to [0,size-1] */
 137    int i = util_ifloor(s * size);
 138    *icoord = repeat(i, size);
 139 }
 140
 141
 142 static void
 143 wrap_nearest_clamp(float s, unsigned size, int *icoord)
 144 {
 145    /* s limited to [0,1] */
 146    /* i limited to [0,size-1] */
 147    if (s <= 0.0F)
 148       *icoord = 0;
 149    else if (s >= 1.0F)
 150       *icoord = size - 1;
 151    else
 152       *icoord = util_ifloor(s * size);
 153 }
 154
 155
 156 static void
 157 wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
 158 {
 159    /* s limited to [min,max] */
 160    /* i limited to [0, size-1] */
 161    const float min = 1.0F / (2.0F * size);
 162    const float max = 1.0F - min;
 163    if (s < min)
 164       *icoord = 0;
 165    else if (s > max)
 166       *icoord = size - 1;
 167    else
 168       *icoord = util_ifloor(s * size);
 169 }
 170
 171
 172 static void
 173 wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
 174 {
 175    /* s limited to [min,max] */
 176    /* i limited to [-1, size] */
 177    const float min = -1.0F / (2.0F * size);
 178    const float max = 1.0F - min;
 179    if (s <= min)
 180       *icoord = -1;
 181    else if (s >= max)
 182       *icoord = size;
 183    else
 184       *icoord = util_ifloor(s * size);
 185 }
 186
 187
 188 static void
 189 wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
 190 {
 191    const float min = 1.0F / (2.0F * size);
 192    const float max = 1.0F - min;
 193    const int flr = util_ifloor(s);
 194    float u = frac(s);
 195    if (flr & 1)
 196       u = 1.0F - u;
 197    if (u < min)
 198       *icoord = 0;
 199    else if (u > max)
 200       *icoord = size - 1;
 201    else
 202       *icoord = util_ifloor(u * size);
 203 }
 204
 205
 206 static void
 207 wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
 208 {
 209    /* s limited to [0,1] */
 210    /* i limited to [0,size-1] */
 211    const float u = fabsf(s);
 212    if (u <= 0.0F)
 213       *icoord = 0;
 214    else if (u >= 1.0F)
 215       *icoord = size - 1;
 216    else
 217       *icoord = util_ifloor(u * size);
 218 }
 219
 220
 221 static void
 222 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
 223 {
 224    /* s limited to [min,max] */
 225    /* i limited to [0, size-1] */
 226    const float min = 1.0F / (2.0F * size);
 227    const float max = 1.0F - min;
 228    const float u = fabsf(s);
 229    if (u < min)
 230       *icoord = 0;
 231    else if (u > max)
 232       *icoord = size - 1;
 233    else
 234       *icoord = util_ifloor(u * size);
 235 }
 236
 237
 238 static void
 239 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
 240 {
 241    /* s limited to [min,max] */
 242    /* i limited to [0, size-1] */
 243    const float min = -1.0F / (2.0F * size);
 244    const float max = 1.0F - min;
 245    const float u = fabsf(s);
 246    if (u < min)
 247       *icoord = -1;
 248    else if (u > max)
 249       *icoord = size;
 250    else
 251       *icoord = util_ifloor(u * size);
 252 }
 253
 254
 255 /**
 256  * Used to compute texel locations for linear sampling
 257  * \param wrapMode  PIPE_TEX_WRAP_x
 258  * \param s  the texcoord
 259  * \param size  the texture image size
 260  * \param icoord0  returns first texture index
 261  * \param icoord1  returns second texture index (usually icoord0 + 1)
 262  * \param w  returns blend factor/weight between texture indices
 263  * \param icoord  returns the computed integer texture coord
 264  */
 265 static void
 266 wrap_linear_repeat(float s, unsigned size,
 267                    int *icoord0, int *icoord1, float *w)
 268 {
 269    float u = s * size - 0.5F;
 270    *icoord0 = repeat(util_ifloor(u), size);
 271    *icoord1 = repeat(*icoord0 + 1, size);
 272    *w = frac(u);
 273 }
 274
 275
 276 static void
 277 wrap_linear_clamp(float s, unsigned size,
 278                   int *icoord0, int *icoord1, float *w)
 279 {
 280    float u = CLAMP(s, 0.0F, 1.0F);
 281    u = u * size - 0.5f;
 282    *icoord0 = util_ifloor(u);
 283    *icoord1 = *icoord0 + 1;
 284    *w = frac(u);
 285 }
 286
 287
 288 static void
 289 wrap_linear_clamp_to_edge(float s, unsigned size,
 290                           int *icoord0, int *icoord1, float *w)
 291 {
 292    float u = CLAMP(s, 0.0F, 1.0F);
 293    u = u * size - 0.5f;
 294    *icoord0 = util_ifloor(u);
 295    *icoord1 = *icoord0 + 1;
 296    if (*icoord0 < 0)
 297       *icoord0 = 0;
 298    if (*icoord1 >= (int) size)
 299       *icoord1 = size - 1;
 300    *w = frac(u);
 301 }
 302
 303
 304 static void
 305 wrap_linear_clamp_to_border(float s, unsigned size,
 306                             int *icoord0, int *icoord1, float *w)
 307 {
 308    const float min = -1.0F / (2.0F * size);
 309    const float max = 1.0F - min;
 310    float u = CLAMP(s, min, max);
 311    u = u * size - 0.5f;
 312    *icoord0 = util_ifloor(u);
 313    *icoord1 = *icoord0 + 1;
 314    *w = frac(u);
 315 }
 316
 317
 318 static void
 319 wrap_linear_mirror_repeat(float s, unsigned size,
 320                           int *icoord0, int *icoord1, float *w)
 321 {
 322    const int flr = util_ifloor(s);
 323    float u = frac(s);
 324    if (flr & 1)
 325       u = 1.0F - u;
 326    u = u * size - 0.5F;
 327    *icoord0 = util_ifloor(u);
 328    *icoord1 = *icoord0 + 1;
 329    if (*icoord0 < 0)
 330       *icoord0 = 0;
 331    if (*icoord1 >= (int) size)
 332       *icoord1 = size - 1;
 333    *w = frac(u);
 334 }
 335
 336
 337 static void
 338 wrap_linear_mirror_clamp(float s, unsigned size,
 339                          int *icoord0, int *icoord1, float *w)
 340 {
 341    float u = fabsf(s);
 342    if (u >= 1.0F)
 343       u = (float) size;
 344    else
 345       u *= size;
 346    u -= 0.5F;
 347    *icoord0 = util_ifloor(u);
 348    *icoord1 = *icoord0 + 1;
 349    *w = frac(u);
 350 }
 351
 352
 353 static void
 354 wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
 355                                  int *icoord0, int *icoord1, float *w)
 356 {
 357    float u = fabsf(s);
 358    if (u >= 1.0F)
 359       u = (float) size;
 360    else
 361       u *= size;
 362    u -= 0.5F;
 363    *icoord0 = util_ifloor(u);
 364    *icoord1 = *icoord0 + 1;
 365    if (*icoord0 < 0)
 366       *icoord0 = 0;
 367    if (*icoord1 >= (int) size)
 368       *icoord1 = size - 1;
 369    *w = frac(u);
 370 }
 371
 372
 373 static void
 374 wrap_linear_mirror_clamp_to_border(float s, unsigned size,
 375                                    int *icoord0, int *icoord1, float *w)
 376 {
 377    const float min = -1.0F / (2.0F * size);
 378    const float max = 1.0F - min;
 379    float u = fabsf(s);
 380    if (u <= min)
 381       u = min * size;
 382    else if (u >= max)
 383       u = max * size;
 384    else
 385       u *= size;
 386    u -= 0.5F;
 387    *icoord0 = util_ifloor(u);
 388    *icoord1 = *icoord0 + 1;
 389    *w = frac(u);
 390 }
 391
 392
 393 /**
 394  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
 395  */
 396 static void
 397 wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
 398 {
 399    int i = util_ifloor(s);
 400    *icoord = CLAMP(i, 0, (int) size-1);
 401 }
 402
 403
 404 /**
 405  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
 406  */
 407 static void
 408 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
 409 {
 410    *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
 411 }
 412
 413
 414 /**
 415  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
 416  */
 417 static void
 418 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
 419 {
 420    *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
 421 }
 422
 423
 424 /**
 425  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
 426  */
 427 static void
 428 wrap_linear_unorm_clamp(float s, unsigned size,
 429                         int *icoord0, int *icoord1, float *w)
 430 {
 431    /* Not exactly what the spec says, but it matches NVIDIA output */
 432    float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
 433    *icoord0 = util_ifloor(u);
 434    *icoord1 = *icoord0 + 1;
 435    *w = frac(u);
 436 }
 437
 438
 439 /**
 440  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
 441  */
 442 static void
 443 wrap_linear_unorm_clamp_to_border(float s, unsigned size,
 444                                   int *icoord0, int *icoord1, float *w)
 445 {
 446    float u = CLAMP(s, -0.5F, (float) size + 0.5F);
 447    u -= 0.5F;
 448    *icoord0 = util_ifloor(u);
 449    *icoord1 = *icoord0 + 1;
 450    if (*icoord1 > (int) size - 1)
 451       *icoord1 = size - 1;
 452    *w = frac(u);
 453 }
 454
 455
 456 /**
 457  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
 458  */
 459 static void
 460 wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
 461                                 int *icoord0, int *icoord1, float *w)
 462 {
 463    float u = CLAMP(s, +0.5F, (float) size - 0.5F);
 464    u -= 0.5F;
 465    *icoord0 = util_ifloor(u);
 466    *icoord1 = *icoord0 + 1;
 467    if (*icoord1 > (int) size - 1)
 468       *icoord1 = size - 1;
 469    *w = frac(u);
 470 }
 471
 472
 473 /**
 474  * Do coordinate to array index conversion.  For array textures.
 475  */
 476 static INLINE void
 477 wrap_array_layer(float coord, unsigned size, int *layer)
 478 {
 479    int c = util_ifloor(coord + 0.5F);
 480    *layer = CLAMP(c, 0, size - 1);
 481 }
 482
 483
 484 /**
 485  * Examine the quad's texture coordinates to compute the partial
 486  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 487  */
 488 static float
 489 compute_lambda_1d(const struct sp_sampler_variant *samp,
 490                   const float s[TGSI_QUAD_SIZE],
 491                   const float t[TGSI_QUAD_SIZE],
 492                   const float p[TGSI_QUAD_SIZE])
 493 {
 494    const struct pipe_resource *texture = samp->view->texture;
 495    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 496    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 497    float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
 498
 499    return util_fast_log2(rho);
 500 }
 501
 502
 503 static float
 504 compute_lambda_2d(const struct sp_sampler_variant *samp,
 505                   const float s[TGSI_QUAD_SIZE],
 506                   const float t[TGSI_QUAD_SIZE],
 507                   const float p[TGSI_QUAD_SIZE])
 508 {
 509    const struct pipe_resource *texture = samp->view->texture;
 510    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 511    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 512    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 513    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 514    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
 515    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
 516    float rho  = MAX2(maxx, maxy);
 517
 518    return util_fast_log2(rho);
 519 }
 520
 521
 522 static float
 523 compute_lambda_3d(const struct sp_sampler_variant *samp,
 524                   const float s[TGSI_QUAD_SIZE],
 525                   const float t[TGSI_QUAD_SIZE],
 526                   const float p[TGSI_QUAD_SIZE])
 527 {
 528    const struct pipe_resource *texture = samp->view->texture;
 529    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 530    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 531    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 532    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 533    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 534    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 535    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
 536    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
 537    float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, samp->view->u.tex.first_level);
 538    float rho;
 539
 540    rho = MAX2(maxx, maxy);
 541    rho = MAX2(rho, maxz);
 542
 543    return util_fast_log2(rho);
 544 }
 545
 546
 547 /**
 548  * Compute lambda for a vertex texture sampler.
 549  * Since there aren't derivatives to use, just return 0.
 550  */
 551 static float
 552 compute_lambda_vert(const struct sp_sampler_variant *samp,
 553                     const float s[TGSI_QUAD_SIZE],
 554                     const float t[TGSI_QUAD_SIZE],
 555                     const float p[TGSI_QUAD_SIZE])
 556 {
 557    return 0.0f;
 558 }
 559
 560
 561
 562 /**
 563  * Get a texel from a texture, using the texture tile cache.
 564  *
 565  * \param addr  the template tex address containing cube, z, face info.
 566  * \param x  the x coord of texel within 2D image
 567  * \param y  the y coord of texel within 2D image
 568  * \param rgba  the quad to put the texel/color into
 569  *
 570  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 571  * sp_get_cached_tile_tex() function.
 572  */
 573
 574
 575
 576
 577 static INLINE const float *
 578 get_texel_2d_no_border(const struct sp_sampler_variant *samp,
 579                        union tex_tile_address addr, int x, int y)
 580 {
 581    const struct softpipe_tex_cached_tile *tile;
 582
 583    addr.bits.x = x / TILE_SIZE;
 584    addr.bits.y = y / TILE_SIZE;
 585    y %= TILE_SIZE;
 586    x %= TILE_SIZE;
 587
 588    tile = sp_get_cached_tile_tex(samp->cache, addr);
 589
 590    return &tile->data.color[y][x][0];
 591 }
 592
 593
 594 static INLINE const float *
 595 get_texel_2d(const struct sp_sampler_variant *samp,
 596              union tex_tile_address addr, int x, int y)
 597 {
 598    const struct pipe_resource *texture = samp->view->texture;
 599    unsigned level = addr.bits.level;
 600
 601    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 602        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 603       return samp->sampler->border_color.f;
 604    }
 605    else {
 606       return get_texel_2d_no_border( samp, addr, x, y );
 607    }
 608 }
 609
 610 /*
 611  * seamless cubemap neighbour array.
 612  * this array is used to find the adjacent face in each of 4 directions,
 613  * left, right, up, down. (or -x, +x, -y, +y).
 614  */
 615 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
 616    /* pos X first then neg X is Z different, Y the same */
 617    /* PIPE_TEX_FACE_POS_X,*/
 618    { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
 619      PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
 620    /* PIPE_TEX_FACE_NEG_X */
 621    { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
 622      PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
 623
 624    /* pos Y first then neg Y is X different, X the same */
 625    /* PIPE_TEX_FACE_POS_Y */
 626    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 627      PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
 628
 629    /* PIPE_TEX_FACE_NEG_Y */
 630    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 631      PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
 632
 633    /* pos Z first then neg Y is X different, X the same */
 634    /* PIPE_TEX_FACE_POS_Z */
 635    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 636      PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
 637
 638    /* PIPE_TEX_FACE_NEG_Z */
 639    { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
 640      PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y }
 641 };
 642
 643 static INLINE unsigned
 644 get_next_face(unsigned face, int x, int y)
 645 {
 646    int idx = 0;
 647
 648    if (x == 0 && y == 0)
 649       return face;
 650    if (x == -1)
 651       idx = 0;
 652    else if (x == 1)
 653       idx = 1;
 654    else if (y == -1)
 655       idx = 2;
 656    else if (y == 1)
 657       idx = 3;
 658
 659    return face_array[face][idx];
 660 }
 661
 662 static INLINE const float *
 663 get_texel_cube_seamless(const struct sp_sampler_variant *samp,
 664                         union tex_tile_address addr, int x, int y,
 665                         float *corner)
 666 {
 667    const struct pipe_resource *texture = samp->view->texture;
 668    unsigned level = addr.bits.level;
 669    unsigned face = addr.bits.face;
 670    int new_x, new_y;
 671    int max_x, max_y;
 672    int c;
 673
 674    max_x = (int) u_minify(texture->width0, level);
 675    max_y = (int) u_minify(texture->height0, level);
 676    new_x = x;
 677    new_y = y;
 678
 679    /* the corner case */
 680    if ((x < 0 || x >= max_x) &&
 681        (y < 0 || y >= max_y)) {
 682       const float *c1, *c2, *c3;
 683       int fx = x < 0 ? 0 : max_x - 1;
 684       int fy = y < 0 ? 0 : max_y - 1;
 685       c1 = get_texel_2d_no_border( samp, addr, fx, fy);
 686       addr.bits.face = get_next_face(face, (x < 0) ? -1 : 1, 0);
 687       c2 = get_texel_2d_no_border( samp, addr, (x < 0) ? max_x - 1 : 0, fy);
 688       addr.bits.face = get_next_face(face, 0, (y < 0) ? -1 : 1);
 689       c3 = get_texel_2d_no_border( samp, addr, fx, (y < 0) ?  max_y - 1 : 0);
 690       for (c = 0; c < TGSI_QUAD_SIZE; c++)
 691          corner[c] = CLAMP((c1[c] + c2[c] + c3[c]), 0.0F, 1.0F) / 3;
 692
 693       return corner;
 694    }
 695    /* change the face */
 696    if (x < 0) {
 697       new_x = max_x - 1;
 698       face = get_next_face(face, -1, 0);
 699    } else if (x >= max_x) {
 700       new_x = 0;
 701       face = get_next_face(face, 1, 0);
 702    } else if (y < 0) {
 703       new_y = max_y - 1;
 704       face = get_next_face(face, 0, -1);
 705    } else if (y >= max_y) {
 706       new_y = 0;
 707       face = get_next_face(face, 0, 1);
 708    }
 709
 710    addr.bits.face = face;
 711    return get_texel_2d_no_border( samp, addr, new_x, new_y );
 712 }
 713
 714 /* Gather a quad of adjacent texels within a tile:
 715  */
 716 static INLINE void
 717 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
 718                                         union tex_tile_address addr,
 719                                         unsigned x, unsigned y,
 720                                         const float *out[4])
 721 {
 722    const struct softpipe_tex_cached_tile *tile;
 723
 724    addr.bits.x = x / TILE_SIZE;
 725    addr.bits.y = y / TILE_SIZE;
 726    y %= TILE_SIZE;
 727    x %= TILE_SIZE;
 728
 729    tile = sp_get_cached_tile_tex(samp->cache, addr);
 730
 731    out[0] = &tile->data.color[y  ][x  ][0];
 732    out[1] = &tile->data.color[y  ][x+1][0];
 733    out[2] = &tile->data.color[y+1][x  ][0];
 734    out[3] = &tile->data.color[y+1][x+1][0];
 735 }
 736
 737
 738 /* Gather a quad of potentially non-adjacent texels:
 739  */
 740 static INLINE void
 741 get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
 742                             union tex_tile_address addr,
 743                             int x0, int y0,
 744                             int x1, int y1,
 745                             const float *out[4])
 746 {
 747    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
 748    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
 749    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
 750    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
 751 }
 752
 753 /* Can involve a lot of unnecessary checks for border color:
 754  */
 755 static INLINE void
 756 get_texel_quad_2d(const struct sp_sampler_variant *samp,
 757                   union tex_tile_address addr,
 758                   int x0, int y0,
 759                   int x1, int y1,
 760                   const float *out[4])
 761 {
 762    out[0] = get_texel_2d( samp, addr, x0, y0 );
 763    out[1] = get_texel_2d( samp, addr, x1, y0 );
 764    out[3] = get_texel_2d( samp, addr, x1, y1 );
 765    out[2] = get_texel_2d( samp, addr, x0, y1 );
 766 }
 767
 768
 769
 770 /* 3d variants:
 771  */
 772 static INLINE const float *
 773 get_texel_3d_no_border(const struct sp_sampler_variant *samp,
 774                        union tex_tile_address addr, int x, int y, int z)
 775 {
 776    const struct softpipe_tex_cached_tile *tile;
 777
 778    addr.bits.x = x / TILE_SIZE;
 779    addr.bits.y = y / TILE_SIZE;
 780    addr.bits.z = z;
 781    y %= TILE_SIZE;
 782    x %= TILE_SIZE;
 783
 784    tile = sp_get_cached_tile_tex(samp->cache, addr);
 785
 786    return &tile->data.color[y][x][0];
 787 }
 788
 789
 790 static INLINE const float *
 791 get_texel_3d(const struct sp_sampler_variant *samp,
 792              union tex_tile_address addr, int x, int y, int z)
 793 {
 794    const struct pipe_resource *texture = samp->view->texture;
 795    unsigned level = addr.bits.level;
 796
 797    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 798        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 799        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 800       return samp->sampler->border_color.f;
 801    }
 802    else {
 803       return get_texel_3d_no_border( samp, addr, x, y, z );
 804    }
 805 }
 806
 807
 808 /* Get texel pointer for 1D array texture */
 809 static INLINE const float *
 810 get_texel_1d_array(const struct sp_sampler_variant *samp,
 811                    union tex_tile_address addr, int x, int y)
 812 {
 813    const struct pipe_resource *texture = samp->view->texture;
 814    unsigned level = addr.bits.level;
 815
 816    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
 817       return samp->sampler->border_color.f;
 818    }
 819    else {
 820       return get_texel_2d_no_border(samp, addr, x, y);
 821    }
 822 }
 823
 824
 825 /* Get texel pointer for 2D array texture */
 826 static INLINE const float *
 827 get_texel_2d_array(const struct sp_sampler_variant *samp,
 828                    union tex_tile_address addr, int x, int y, int layer)
 829 {
 830    const struct pipe_resource *texture = samp->view->texture;
 831    unsigned level = addr.bits.level;
 832
 833    assert(layer < (int) texture->array_size);
 834    assert(layer >= 0);
 835
 836    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 837        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 838       return samp->sampler->border_color.f;
 839    }
 840    else {
 841       return get_texel_3d_no_border(samp, addr, x, y, layer);
 842    }
 843 }
 844
 845
 846 /* Get texel pointer for cube array texture */
 847 static INLINE const float *
 848 get_texel_cube_array(const struct sp_sampler_variant *samp,
 849                      union tex_tile_address addr, int x, int y, int layer)
 850 {
 851    const struct pipe_resource *texture = samp->view->texture;
 852    unsigned level = addr.bits.level;
 853
 854    assert(layer < (int) texture->array_size);
 855    assert(layer >= 0);
 856
 857    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 858        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 859       return samp->sampler->border_color.f;
 860    }
 861    else {
 862       return get_texel_3d_no_border(samp, addr, x, y, layer);
 863    }
 864 }
 865 /**
 866  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 867  * return the size (in texels) of that mipmap level.
 868  * For example, if level[0].width = 256 then base_pot will be 8.
 869  * If level = 2, then we'll return 64 (the width at level=2).
 870  * Return 1 if level > base_pot.
 871  */
 872 static INLINE unsigned
 873 pot_level_size(unsigned base_pot, unsigned level)
 874 {
 875    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 876 }
 877
 878
 879 static void
 880 print_sample(const char *function, const float *rgba)
 881 {
 882    debug_printf("%s %g %g %g %g\n",
 883                 function,
 884                 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
 885 }
 886
 887
 888 static void
 889 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 890 {
 891    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
 892                 function,
 893                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
 894                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
 895                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
 896                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
 897 }
 898
 899 /* Some image-filter fastpaths:
 900  */
 901 static INLINE void
 902 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 903                                 float s,
 904                                 float t,
 905                                 float p,
 906                                 unsigned level,
 907                                 unsigned face_id,
 908                                 float *rgba)
 909 {
 910    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
 911    unsigned xpot = pot_level_size(samp->xpot, level);
 912    unsigned ypot = pot_level_size(samp->ypot, level);
 913    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 914    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 915    union tex_tile_address addr;
 916    int c;
 917
 918    float u = s * xpot - 0.5F;
 919    float v = t * ypot - 0.5F;
 920
 921    int uflr = util_ifloor(u);
 922    int vflr = util_ifloor(v);
 923
 924    float xw = u - (float)uflr;
 925    float yw = v - (float)vflr;
 926
 927    int x0 = uflr & (xpot - 1);
 928    int y0 = vflr & (ypot - 1);
 929
 930    const float *tx[4];
 931
 932    addr.value = 0;
 933    addr.bits.level = level;
 934
 935    /* Can we fetch all four at once:
 936     */
 937    if (x0 < xmax && y0 < ymax) {
 938       get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
 939    }
 940    else {
 941       unsigned x1 = (x0 + 1) & (xpot - 1);
 942       unsigned y1 = (y0 + 1) & (ypot - 1);
 943       get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
 944    }
 945
 946    /* interpolate R, G, B, A */
 947    for (c = 0; c < TGSI_QUAD_SIZE; c++) {
 948       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
 949                                        tx[0][c], tx[1][c],
 950                                        tx[2][c], tx[3][c]);
 951    }
 952
 953    if (DEBUG_TEX) {
 954       print_sample(__FUNCTION__, rgba);
 955    }
 956 }
 957
 958
 959 static INLINE void
 960 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 961                                  float s,
 962                                  float t,
 963                                  float p,
 964                                  unsigned level,
 965                                  unsigned face_id,
 966                                  float rgba[TGSI_QUAD_SIZE])
 967 {
 968    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
 969    unsigned xpot = pot_level_size(samp->xpot, level);
 970    unsigned ypot = pot_level_size(samp->ypot, level);
 971    const float *out;
 972    union tex_tile_address addr;
 973    int c;
 974
 975    float u = s * xpot;
 976    float v = t * ypot;
 977
 978    int uflr = util_ifloor(u);
 979    int vflr = util_ifloor(v);
 980
 981    int x0 = uflr & (xpot - 1);
 982    int y0 = vflr & (ypot - 1);
 983
 984    addr.value = 0;
 985    addr.bits.level = level;
 986
 987    out = get_texel_2d_no_border(samp, addr, x0, y0);
 988    for (c = 0; c < TGSI_QUAD_SIZE; c++)
 989       rgba[TGSI_NUM_CHANNELS*c] = out[c];
 990
 991    if (DEBUG_TEX) {
 992       print_sample(__FUNCTION__, rgba);
 993    }
 994 }
 995
 996
 997 static INLINE void
 998 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 999                                 float s,
1000                                 float t,
1001                                 float p,
1002                                 unsigned level,
1003                                 unsigned face_id,
1004                                 float rgba[TGSI_QUAD_SIZE])
1005 {
1006    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1007    unsigned xpot = pot_level_size(samp->xpot, level);
1008    unsigned ypot = pot_level_size(samp->ypot, level);
1009    union tex_tile_address addr;
1010    int c;
1011
1012    float u = s * xpot;
1013    float v = t * ypot;
1014
1015    int x0, y0;
1016    const float *out;
1017
1018    addr.value = 0;
1019    addr.bits.level = level;
1020
1021    x0 = util_ifloor(u);
1022    if (x0 < 0)
1023       x0 = 0;
1024    else if (x0 > xpot - 1)
1025       x0 = xpot - 1;
1026
1027    y0 = util_ifloor(v);
1028    if (y0 < 0)
1029       y0 = 0;
1030    else if (y0 > ypot - 1)
1031       y0 = ypot - 1;
1032
1033    out = get_texel_2d_no_border(samp, addr, x0, y0);
1034    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1035       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1036
1037    if (DEBUG_TEX) {
1038       print_sample(__FUNCTION__, rgba);
1039    }
1040 }
1041
1042
1043 static void
1044 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
1045                       float s,
1046                       float t,
1047                       float p,
1048                       unsigned level,
1049                       unsigned face_id,
1050                       float rgba[TGSI_QUAD_SIZE])
1051 {
1052    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1053    const struct pipe_resource *texture = samp->view->texture;
1054    int width;
1055    int x;
1056    union tex_tile_address addr;
1057    const float *out;
1058    int c;
1059
1060    width = u_minify(texture->width0, level);
1061
1062    assert(width > 0);
1063
1064    addr.value = 0;
1065    addr.bits.level = level;
1066
1067    samp->nearest_texcoord_s(s, width, &x);
1068
1069    out = get_texel_2d(samp, addr, x, 0);
1070    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1071       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1072
1073    if (DEBUG_TEX) {
1074       print_sample(__FUNCTION__, rgba);
1075    }
1076 }
1077
1078
1079 static void
1080 img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
1081                             float s,
1082                             float t,
1083                             float p,
1084                             unsigned level,
1085                             unsigned face_id,
1086                             float *rgba)
1087 {
1088    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1089    const struct pipe_resource *texture = samp->view->texture;
1090    int width;
1091    int x, layer;
1092    union tex_tile_address addr;
1093    const float *out;
1094    int c;
1095
1096    width = u_minify(texture->width0, level);
1097
1098    assert(width > 0);
1099
1100    addr.value = 0;
1101    addr.bits.level = level;
1102
1103    samp->nearest_texcoord_s(s, width, &x);
1104    wrap_array_layer(t, texture->array_size, &layer);
1105
1106    out = get_texel_1d_array(samp, addr, x, layer);
1107    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1108       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1109
1110    if (DEBUG_TEX) {
1111       print_sample(__FUNCTION__, rgba);
1112    }
1113 }
1114
1115
1116 static void
1117 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
1118                       float s,
1119                       float t,
1120                       float p,
1121                       unsigned level,
1122                       unsigned face_id,
1123                       float *rgba)
1124 {
1125    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1126    const struct pipe_resource *texture = samp->view->texture;
1127    int width, height;
1128    int x, y;
1129    union tex_tile_address addr;
1130    const float *out;
1131    int c;
1132
1133    width = u_minify(texture->width0, level);
1134    height = u_minify(texture->height0, level);
1135
1136    assert(width > 0);
1137    assert(height > 0);
1138
1139    addr.value = 0;
1140    addr.bits.level = level;
1141
1142    samp->nearest_texcoord_s(s, width, &x);
1143    samp->nearest_texcoord_t(t, height, &y);
1144
1145    out = get_texel_2d(samp, addr, x, y);
1146    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1147       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1148
1149    if (DEBUG_TEX) {
1150       print_sample(__FUNCTION__, rgba);
1151    }
1152 }
1153
1154
1155 static void
1156 img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
1157                             float s,
1158                             float t,
1159                             float p,
1160                             unsigned level,
1161                             unsigned face_id,
1162                             float *rgba)
1163 {
1164    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1165    const struct pipe_resource *texture = samp->view->texture;
1166    int width, height;
1167    int x, y, layer;
1168    union tex_tile_address addr;
1169    const float *out;
1170    int c;
1171
1172    width = u_minify(texture->width0, level);
1173    height = u_minify(texture->height0, level);
1174
1175    assert(width > 0);
1176    assert(height > 0);
1177
1178    addr.value = 0;
1179    addr.bits.level = level;
1180
1181    samp->nearest_texcoord_s(s, width, &x);
1182    samp->nearest_texcoord_t(t, height, &y);
1183    wrap_array_layer(p, texture->array_size, &layer);
1184
1185    out = get_texel_2d_array(samp, addr, x, y, layer);
1186    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1187       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1188
1189    if (DEBUG_TEX) {
1190       print_sample(__FUNCTION__, rgba);
1191    }
1192 }
1193
1194
1195 static INLINE union tex_tile_address
1196 face(union tex_tile_address addr, unsigned face )
1197 {
1198    addr.bits.face = face;
1199    return addr;
1200 }
1201
1202
1203 static void
1204 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
1205                         float s,
1206                         float t,
1207                         float p,
1208                         unsigned level,
1209                         unsigned face_id,
1210                         float *rgba)
1211 {
1212    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1213    const struct pipe_resource *texture = samp->view->texture;
1214    int width, height;
1215    int x, y;
1216    union tex_tile_address addr;
1217    const float *out;
1218    int c;
1219
1220    width = u_minify(texture->width0, level);
1221    height = u_minify(texture->height0, level);
1222
1223    assert(width > 0);
1224    assert(height > 0);
1225
1226    addr.value = 0;
1227    addr.bits.level = level;
1228
1229    /*
1230     * If NEAREST filtering is done within a miplevel, always apply wrap
1231     * mode CLAMP_TO_EDGE.
1232     */
1233    if (samp->sampler->seamless_cube_map) {
1234       wrap_nearest_clamp_to_edge(s, width, &x);
1235       wrap_nearest_clamp_to_edge(t, height, &y);
1236    } else {
1237       samp->nearest_texcoord_s(s, width, &x);
1238       samp->nearest_texcoord_t(t, height, &y);
1239    }
1240
1241    out = get_texel_2d(samp, face(addr, face_id), x, y);
1242    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1243       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1244
1245    if (DEBUG_TEX) {
1246       print_sample(__FUNCTION__, rgba);
1247    }
1248 }
1249
1250 static void
1251 img_filter_cube_array_nearest(struct tgsi_sampler *tgsi_sampler,
1252                         float s,
1253                         float t,
1254                         float p,
1255                         unsigned level,
1256                         unsigned face_id,
1257                         float *rgba)
1258 {
1259    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1260    const struct pipe_resource *texture = samp->view->texture;
1261    int width, height;
1262    int x, y, layer;
1263    union tex_tile_address addr;
1264    const float *out;
1265    int c;
1266
1267    width = u_minify(texture->width0, level);
1268    height = u_minify(texture->height0, level);
1269
1270    assert(width > 0);
1271    assert(height > 0);
1272
1273    addr.value = 0;
1274    addr.bits.level = level;
1275
1276    samp->nearest_texcoord_s(s, width, &x);
1277    samp->nearest_texcoord_t(t, height, &y);
1278    wrap_array_layer(p, texture->array_size, &layer);
1279
1280    out = get_texel_cube_array(samp, addr, x, y, layer * 6 + face_id);
1281    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1282       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1283
1284    if (DEBUG_TEX) {
1285       print_sample(__FUNCTION__, rgba);
1286    }
1287 }
1288
1289 static void
1290 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1291                       float s,
1292                       float t,
1293                       float p,
1294                       unsigned level,
1295                       unsigned face_id,
1296                       float *rgba)
1297 {
1298    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1299    const struct pipe_resource *texture = samp->view->texture;
1300    int width, height, depth;
1301    int x, y, z;
1302    union tex_tile_address addr;
1303    const float *out;
1304    int c;
1305
1306    width = u_minify(texture->width0, level);
1307    height = u_minify(texture->height0, level);
1308    depth = u_minify(texture->depth0, level);
1309
1310    assert(width > 0);
1311    assert(height > 0);
1312    assert(depth > 0);
1313
1314    samp->nearest_texcoord_s(s, width,  &x);
1315    samp->nearest_texcoord_t(t, height, &y);
1316    samp->nearest_texcoord_p(p, depth,  &z);
1317
1318    addr.value = 0;
1319    addr.bits.level = level;
1320
1321    out = get_texel_3d(samp, addr, x, y, z);
1322    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1323       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1324 }
1325
1326
1327 static void
1328 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1329                      float s,
1330                      float t,
1331                      float p,
1332                      unsigned level,
1333                      unsigned face_id,
1334                      float *rgba)
1335 {
1336    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1337    const struct pipe_resource *texture = samp->view->texture;
1338    int width;
1339    int x0, x1;
1340    float xw; /* weights */
1341    union tex_tile_address addr;
1342    const float *tx0, *tx1;
1343    int c;
1344
1345    width = u_minify(texture->width0, level);
1346
1347    assert(width > 0);
1348
1349    addr.value = 0;
1350    addr.bits.level = level;
1351
1352    samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1353
1354    tx0 = get_texel_2d(samp, addr, x0, 0);
1355    tx1 = get_texel_2d(samp, addr, x1, 0);
1356
1357    /* interpolate R, G, B, A */
1358    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1359       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1360 }
1361
1362
1363 static void
1364 img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
1365                            float s,
1366                            float t,
1367                            float p,
1368                            unsigned level,
1369                            unsigned face_id,
1370                            float *rgba)
1371 {
1372    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1373    const struct pipe_resource *texture = samp->view->texture;
1374    int width;
1375    int x0, x1, layer;
1376    float xw; /* weights */
1377    union tex_tile_address addr;
1378    const float *tx0, *tx1;
1379    int c;
1380
1381    width = u_minify(texture->width0, level);
1382
1383    assert(width > 0);
1384
1385    addr.value = 0;
1386    addr.bits.level = level;
1387
1388    samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1389    wrap_array_layer(t, texture->array_size, &layer);
1390
1391    tx0 = get_texel_1d_array(samp, addr, x0, layer);
1392    tx1 = get_texel_1d_array(samp, addr, x1, layer);
1393
1394    /* interpolate R, G, B, A */
1395    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1396       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1397 }
1398
1399
1400 static void
1401 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1402                      float s,
1403                      float t,
1404                      float p,
1405                      unsigned level,
1406                      unsigned face_id,
1407                      float *rgba)
1408 {
1409    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1410    const struct pipe_resource *texture = samp->view->texture;
1411    int width, height;
1412    int x0, y0, x1, y1;
1413    float xw, yw; /* weights */
1414    union tex_tile_address addr;
1415    const float *tx0, *tx1, *tx2, *tx3;
1416    int c;
1417
1418    width = u_minify(texture->width0, level);
1419    height = u_minify(texture->height0, level);
1420
1421    assert(width > 0);
1422    assert(height > 0);
1423
1424    addr.value = 0;
1425    addr.bits.level = level;
1426
1427    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1428    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1429
1430    tx0 = get_texel_2d(samp, addr, x0, y0);
1431    tx1 = get_texel_2d(samp, addr, x1, y0);
1432    tx2 = get_texel_2d(samp, addr, x0, y1);
1433    tx3 = get_texel_2d(samp, addr, x1, y1);
1434
1435    /* interpolate R, G, B, A */
1436    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1437       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1438                                           tx0[c], tx1[c],
1439                                           tx2[c], tx3[c]);
1440 }
1441
1442
1443 static void
1444 img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
1445                            float s,
1446                            float t,
1447                            float p,
1448                            unsigned level,
1449                            unsigned face_id,
1450                            float *rgba)
1451 {
1452    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1453    const struct pipe_resource *texture = samp->view->texture;
1454    int width, height;
1455    int x0, y0, x1, y1, layer;
1456    float xw, yw; /* weights */
1457    union tex_tile_address addr;
1458    const float *tx0, *tx1, *tx2, *tx3;
1459    int c;
1460
1461    width = u_minify(texture->width0, level);
1462    height = u_minify(texture->height0, level);
1463
1464    assert(width > 0);
1465    assert(height > 0);
1466
1467    addr.value = 0;
1468    addr.bits.level = level;
1469
1470    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1471    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1472    wrap_array_layer(p, texture->array_size, &layer);
1473
1474    tx0 = get_texel_2d_array(samp, addr, x0, y0, layer);
1475    tx1 = get_texel_2d_array(samp, addr, x1, y0, layer);
1476    tx2 = get_texel_2d_array(samp, addr, x0, y1, layer);
1477    tx3 = get_texel_2d_array(samp, addr, x1, y1, layer);
1478
1479    /* interpolate R, G, B, A */
1480    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1481       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1482                                           tx0[c], tx1[c],
1483                                           tx2[c], tx3[c]);
1484 }
1485
1486
1487 static void
1488 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1489                        float s,
1490                        float t,
1491                        float p,
1492                        unsigned level,
1493                        unsigned face_id,
1494                        float *rgba)
1495 {
1496    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1497    const struct pipe_resource *texture = samp->view->texture;
1498    int width, height;
1499    int x0, y0, x1, y1;
1500    float xw, yw; /* weights */
1501    union tex_tile_address addr, addrj;
1502    const float *tx0, *tx1, *tx2, *tx3;
1503    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE], corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1504    int c;
1505
1506    width = u_minify(texture->width0, level);
1507    height = u_minify(texture->height0, level);
1508
1509    assert(width > 0);
1510    assert(height > 0);
1511
1512    addr.value = 0;
1513    addr.bits.level = level;
1514
1515    /*
1516     * For seamless if LINEAR filtering is done within a miplevel,
1517     * always apply wrap mode CLAMP_TO_BORDER.
1518     */
1519    if (samp->sampler->seamless_cube_map) {
1520       wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
1521       wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
1522    } else {
1523       samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1524       samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1525    }
1526
1527    addrj = face(addr, face_id);
1528
1529    if (samp->sampler->seamless_cube_map) {
1530       tx0 = get_texel_cube_seamless(samp, addrj, x0, y0, corner0);
1531       tx1 = get_texel_cube_seamless(samp, addrj, x1, y0, corner1);
1532       tx2 = get_texel_cube_seamless(samp, addrj, x0, y1, corner2);
1533       tx3 = get_texel_cube_seamless(samp, addrj, x1, y1, corner3);
1534    } else {
1535       tx0 = get_texel_2d(samp, addrj, x0, y0);
1536       tx1 = get_texel_2d(samp, addrj, x1, y0);
1537       tx2 = get_texel_2d(samp, addrj, x0, y1);
1538       tx3 = get_texel_2d(samp, addrj, x1, y1);
1539    }
1540    /* interpolate R, G, B, A */
1541    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1542       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1543                                           tx0[c], tx1[c],
1544                                           tx2[c], tx3[c]);
1545 }
1546
1547
1548 static void
1549 img_filter_cube_array_linear(struct tgsi_sampler *tgsi_sampler,
1550                              float s,
1551                              float t,
1552                              float p,
1553                              unsigned level,
1554                              unsigned face_id,
1555                              float *rgba)
1556 {
1557    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1558    const struct pipe_resource *texture = samp->view->texture;
1559    int width, height;
1560    int x0, y0, x1, y1, layer;
1561    float xw, yw; /* weights */
1562    union tex_tile_address addr;
1563    const float *tx0, *tx1, *tx2, *tx3;
1564    int c;
1565
1566    width = u_minify(texture->width0, level);
1567    height = u_minify(texture->height0, level);
1568
1569    assert(width > 0);
1570    assert(height > 0);
1571
1572    addr.value = 0;
1573    addr.bits.level = level;
1574
1575    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1576    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1577    wrap_array_layer(p, texture->array_size, &layer);
1578
1579    tx0 = get_texel_cube_array(samp, addr, x0, y0, layer * 6 + face_id);
1580    tx1 = get_texel_cube_array(samp, addr, x1, y0, layer * 6 + face_id);
1581    tx2 = get_texel_cube_array(samp, addr, x0, y1, layer * 6 + face_id);
1582    tx3 = get_texel_cube_array(samp, addr, x1, y1, layer * 6 + face_id);
1583
1584    /* interpolate R, G, B, A */
1585    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1586       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1587                                           tx0[c], tx1[c],
1588                                           tx2[c], tx3[c]);
1589 }
1590
1591 static void
1592 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1593                      float s,
1594                      float t,
1595                      float p,
1596                      unsigned level,
1597                      unsigned face_id,
1598                      float *rgba)
1599 {
1600    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1601    const struct pipe_resource *texture = samp->view->texture;
1602    int width, height, depth;
1603    int x0, x1, y0, y1, z0, z1;
1604    float xw, yw, zw; /* interpolation weights */
1605    union tex_tile_address addr;
1606    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1607    int c;
1608
1609    width = u_minify(texture->width0, level);
1610    height = u_minify(texture->height0, level);
1611    depth = u_minify(texture->depth0, level);
1612
1613    addr.value = 0;
1614    addr.bits.level = level;
1615
1616    assert(width > 0);
1617    assert(height > 0);
1618    assert(depth > 0);
1619
1620    samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1621    samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1622    samp->linear_texcoord_p(p, depth,  &z0, &z1, &zw);
1623
1624
1625    tx00 = get_texel_3d(samp, addr, x0, y0, z0);
1626    tx01 = get_texel_3d(samp, addr, x1, y0, z0);
1627    tx02 = get_texel_3d(samp, addr, x0, y1, z0);
1628    tx03 = get_texel_3d(samp, addr, x1, y1, z0);
1629
1630    tx10 = get_texel_3d(samp, addr, x0, y0, z1);
1631    tx11 = get_texel_3d(samp, addr, x1, y0, z1);
1632    tx12 = get_texel_3d(samp, addr, x0, y1, z1);
1633    tx13 = get_texel_3d(samp, addr, x1, y1, z1);
1634
1635       /* interpolate R, G, B, A */
1636    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1637       rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1638                                            tx00[c], tx01[c],
1639                                            tx02[c], tx03[c],
1640                                            tx10[c], tx11[c],
1641                                            tx12[c], tx13[c]);
1642 }
1643
1644
1645 /* Calculate level of detail for every fragment.
1646  * Note that lambda has already been biased by global LOD bias.
1647  */
1648 static INLINE void
1649 compute_lod(const struct pipe_sampler_state *sampler,
1650             const float biased_lambda,
1651             const float lodbias[TGSI_QUAD_SIZE],
1652             float lod[TGSI_QUAD_SIZE])
1653 {
1654    uint i;
1655
1656    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1657       lod[i] = biased_lambda + lodbias[i];
1658       lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
1659    }
1660 }
1661
1662
1663 static void
1664 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1665                   const float s[TGSI_QUAD_SIZE],
1666                   const float t[TGSI_QUAD_SIZE],
1667                   const float p[TGSI_QUAD_SIZE],
1668                   const float c0[TGSI_QUAD_SIZE],
1669                   const float c1[TGSI_QUAD_SIZE],
1670                   enum tgsi_sampler_control control,
1671                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1672 {
1673    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1674    const struct pipe_resource *texture = samp->view->texture;
1675    int j;
1676    float lod[TGSI_QUAD_SIZE];
1677
1678    if (control == tgsi_sampler_lod_bias) {
1679       float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1680       if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
1681          compute_lod(samp->sampler, lambda, c1, lod);
1682       else
1683          compute_lod(samp->sampler, lambda, c0, lod);
1684    } else {
1685       assert(control == tgsi_sampler_lod_explicit);
1686
1687       if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
1688          memcpy(lod, c1, sizeof(lod));
1689       else
1690          memcpy(lod, c0, sizeof(lod));
1691
1692    }
1693
1694    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1695       int level0 = samp->view->u.tex.first_level + (int)lod[j];
1696
1697       if (lod[j] < 0.0)
1698          samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level,
1699                               samp->faces[j], &rgba[0][j]);
1700
1701       else if (level0 >= texture->last_level)
1702          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level,
1703                               samp->faces[j], &rgba[0][j]);
1704
1705       else {
1706          float levelBlend = frac(lod[j]);
1707          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1708          int c;
1709
1710          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0,
1711                               samp->faces[j], &rgbax[0][0]);
1712          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0+1,
1713                               samp->faces[j], &rgbax[0][1]);
1714
1715          for (c = 0; c < 4; c++) {
1716             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1717          }
1718       }
1719    }
1720
1721    if (DEBUG_TEX) {
1722       print_sample_4(__FUNCTION__, rgba);
1723    }
1724 }
1725
1726
1727 /**
1728  * Compute nearest mipmap level from texcoords.
1729  * Then sample the texture level for four elements of a quad.
1730  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
1731  */
1732 static void
1733 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1734                    const float s[TGSI_QUAD_SIZE],
1735                    const float t[TGSI_QUAD_SIZE],
1736                    const float p[TGSI_QUAD_SIZE],
1737                    const float c0[TGSI_QUAD_SIZE],
1738                    const float c1[TGSI_QUAD_SIZE],
1739                    enum tgsi_sampler_control control,
1740                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1741 {
1742    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1743    const struct pipe_resource *texture = samp->view->texture;
1744    float lod[TGSI_QUAD_SIZE];
1745    int j;
1746
1747    if (control == tgsi_sampler_lod_bias) {
1748       float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1749       if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
1750          compute_lod(samp->sampler, lambda, c1, lod);
1751       else
1752          compute_lod(samp->sampler, lambda, c0, lod);
1753    } else {
1754       assert(control == tgsi_sampler_lod_explicit);
1755
1756       if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
1757          memcpy(lod, c1, sizeof(lod));
1758       else
1759          memcpy(lod, c0, sizeof(lod));
1760    }
1761
1762    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1763       if (lod[j] < 0.0)
1764          samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level,
1765                               samp->faces[j], &rgba[0][j]);
1766       else {
1767          float level = samp->view->u.tex.first_level + (int)(lod[j] + 0.5F) ;
1768          level = MIN2(level, (int)texture->last_level);
1769          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j],
1770                               &rgba[0][j]);
1771       }
1772    }
1773
1774    if (DEBUG_TEX) {
1775       print_sample_4(__FUNCTION__, rgba);
1776    }
1777 }
1778
1779
1780 static void
1781 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1782                 const float s[TGSI_QUAD_SIZE],
1783                 const float t[TGSI_QUAD_SIZE],
1784                 const float p[TGSI_QUAD_SIZE],
1785                 const float c0[TGSI_QUAD_SIZE],
1786                 const float c1[TGSI_QUAD_SIZE],
1787                 enum tgsi_sampler_control control,
1788                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1789 {
1790    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1791    float lod[TGSI_QUAD_SIZE];
1792    int j;
1793
1794    if (control == tgsi_sampler_lod_bias) {
1795       float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1796       if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
1797          compute_lod(samp->sampler, lambda, c1, lod);
1798       else
1799          compute_lod(samp->sampler, lambda, c0, lod);
1800    } else {
1801       assert(control == tgsi_sampler_lod_explicit);
1802
1803       if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
1804          memcpy(lod, c1, sizeof(lod));
1805       else
1806          memcpy(lod, c0, sizeof(lod));
1807    }
1808
1809    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1810       if (lod[j] < 0.0) {
1811          samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level,
1812                               samp->faces[j], &rgba[0][j]);
1813       }
1814       else {
1815          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level,
1816                               samp->faces[j], &rgba[0][j]);
1817       }
1818    }
1819 }
1820
1821
1822 static void
1823 mip_filter_none_no_filter_select(struct tgsi_sampler *tgsi_sampler,
1824                                      const float s[TGSI_QUAD_SIZE],
1825                                      const float t[TGSI_QUAD_SIZE],
1826                                      const float p[TGSI_QUAD_SIZE],
1827                                      const float c0[TGSI_QUAD_SIZE],
1828                                      const float c1[TGSI_QUAD_SIZE],
1829                                      enum tgsi_sampler_control control,
1830                                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1831 {
1832    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1833    int j;
1834
1835    for (j = 0; j < TGSI_QUAD_SIZE; j++)
1836       samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level,
1837                            samp->faces[j], &rgba[0][j]);
1838 }
1839
1840
1841 /* For anisotropic filtering */
1842 #define WEIGHT_LUT_SIZE 1024
1843
1844 static float *weightLut = NULL;
1845
1846 /**
1847  * Creates the look-up table used to speed-up EWA sampling
1848  */
1849 static void
1850 create_filter_table(void)
1851 {
1852    unsigned i;
1853    if (!weightLut) {
1854       weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
1855
1856       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
1857          float alpha = 2;
1858          float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
1859          float weight = (float) exp(-alpha * r2);
1860          weightLut[i] = weight;
1861       }
1862    }
1863 }
1864
1865
1866 /**
1867  * Elliptical weighted average (EWA) filter for producing high quality
1868  * anisotropic filtered results.
1869  * Based on the Higher Quality Elliptical Weighted Average Filter
1870  * published by Paul S. Heckbert in his Master's Thesis
1871  * "Fundamentals of Texture Mapping and Image Warping" (1989)
1872  */
1873 static void
1874 img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
1875                   const float s[TGSI_QUAD_SIZE],
1876                   const float t[TGSI_QUAD_SIZE],
1877                   const float p[TGSI_QUAD_SIZE],
1878                   unsigned level,
1879                   const float dudx, const float dvdx,
1880                   const float dudy, const float dvdy,
1881                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1882 {
1883    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1884    const struct pipe_resource *texture = samp->view->texture;
1885
1886    // ??? Won't the image filters blow up if level is negative?
1887    unsigned level0 = level > 0 ? level : 0;
1888    float scaling = 1.0 / (1 << level0);
1889    int width = u_minify(texture->width0, level0);
1890    int height = u_minify(texture->height0, level0);
1891
1892    float ux = dudx * scaling;
1893    float vx = dvdx * scaling;
1894    float uy = dudy * scaling;
1895    float vy = dvdy * scaling;
1896
1897    /* compute ellipse coefficients to bound the region:
1898     * A*x*x + B*x*y + C*y*y = F.
1899     */
1900    float A = vx*vx+vy*vy+1;
1901    float B = -2*(ux*vx+uy*vy);
1902    float C = ux*ux+uy*uy+1;
1903    float F = A*C-B*B/4.0;
1904
1905    /* check if it is an ellipse */
1906    /* ASSERT(F > 0.0); */
1907
1908    /* Compute the ellipse's (u,v) bounding box in texture space */
1909    float d = -B*B+4.0*C*A;
1910    float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with   */
1911    float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
1912
1913    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1914    float s_buffer[TGSI_QUAD_SIZE];
1915    float t_buffer[TGSI_QUAD_SIZE];
1916    float weight_buffer[TGSI_QUAD_SIZE];
1917    unsigned buffer_next;
1918    int j;
1919    float den; /* = 0.0F; */
1920    float ddq;
1921    float U; /* = u0 - tex_u; */
1922    int v;
1923
1924    /* Scale ellipse formula to directly index the Filter Lookup Table.
1925     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
1926     */
1927    double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
1928    A *= formScale;
1929    B *= formScale;
1930    C *= formScale;
1931    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
1932
1933    /* For each quad, the du and dx values are the same and so the ellipse is
1934     * also the same. Note that texel/image access can only be performed using
1935     * a quad, i.e. it is not possible to get the pixel value for a single
1936     * tex coord. In order to have a better performance, the access is buffered
1937     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
1938     * full, then the pixel values are read from the image.
1939     */
1940    ddq = 2 * A;
1941
1942    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1943       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
1944        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
1945        * value, q, is less than F, we're inside the ellipse
1946        */
1947       float tex_u = -0.5F + s[j] * texture->width0 * scaling;
1948       float tex_v = -0.5F + t[j] * texture->height0 * scaling;
1949
1950       int u0 = (int) floorf(tex_u - box_u);
1951       int u1 = (int) ceilf(tex_u + box_u);
1952       int v0 = (int) floorf(tex_v - box_v);
1953       int v1 = (int) ceilf(tex_v + box_v);
1954
1955       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
1956       buffer_next = 0;
1957       den = 0;
1958       U = u0 - tex_u;
1959       for (v = v0; v <= v1; ++v) {
1960          float V = v - tex_v;
1961          float dq = A * (2 * U + 1) + B * V;
1962          float q = (C * V + B * U) * V + A * U * U;
1963
1964          int u;
1965          for (u = u0; u <= u1; ++u) {
1966             /* Note that the ellipse has been pre-scaled so F =
1967              * WEIGHT_LUT_SIZE - 1
1968              */
1969             if (q < WEIGHT_LUT_SIZE) {
1970                /* as a LUT is used, q must never be negative;
1971                 * should not happen, though
1972                 */
1973                const int qClamped = q >= 0.0F ? q : 0;
1974                float weight = weightLut[qClamped];
1975
1976                weight_buffer[buffer_next] = weight;
1977                s_buffer[buffer_next] = u / ((float) width);
1978                t_buffer[buffer_next] = v / ((float) height);
1979
1980                buffer_next++;
1981                if (buffer_next == TGSI_QUAD_SIZE) {
1982                   /* 4 texel coords are in the buffer -> read it now */
1983                   unsigned jj;
1984                   /* it is assumed that samp->min_img_filter is set to
1985                    * img_filter_2d_nearest or one of the
1986                    * accelerated img_filter_2d_nearest_XXX functions.
1987                    */
1988                   for (jj = 0; jj < buffer_next; jj++) {
1989                      samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj],
1990                                           level, samp->faces[j], &rgba_temp[0][jj]);
1991                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
1992                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
1993                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
1994                      num[3] += weight_buffer[jj] * rgba_temp[3][jj];
1995                   }
1996
1997                   buffer_next = 0;
1998                }
1999
2000                den += weight;
2001             }
2002             q += dq;
2003             dq += ddq;
2004          }
2005       }
2006
2007       /* if the tex coord buffer contains unread values, we will read
2008        * them now.
2009        */
2010       if (buffer_next > 0) {
2011          unsigned jj;
2012          /* it is assumed that samp->min_img_filter is set to
2013           * img_filter_2d_nearest or one of the
2014           * accelerated img_filter_2d_nearest_XXX functions.
2015           */
2016          for (jj = 0; jj < buffer_next; jj++) {
2017             samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level,
2018                                  samp->faces[j], &rgba_temp[0][jj]);
2019             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2020             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2021             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2022             num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2023          }
2024       }
2025
2026       if (den <= 0.0F) {
2027          /* Reaching this place would mean that no pixels intersected
2028           * the ellipse.  This should never happen because the filter
2029           * we use always intersects at least one pixel.
2030           */
2031
2032          /*rgba[0]=0;
2033          rgba[1]=0;
2034          rgba[2]=0;
2035          rgba[3]=0;*/
2036          /* not enough pixels in resampling, resort to direct interpolation */
2037          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j],
2038                               &rgba_temp[0][j]);
2039          den = 1;
2040          num[0] = rgba_temp[0][j];
2041          num[1] = rgba_temp[1][j];
2042          num[2] = rgba_temp[2][j];
2043          num[3] = rgba_temp[3][j];
2044       }
2045
2046       rgba[0][j] = num[0] / den;
2047       rgba[1][j] = num[1] / den;
2048       rgba[2][j] = num[2] / den;
2049       rgba[3][j] = num[3] / den;
2050    }
2051 }
2052
2053
2054 /**
2055  * Sample 2D texture using an anisotropic filter.
2056  */
2057 static void
2058 mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
2059                         const float s[TGSI_QUAD_SIZE],
2060                         const float t[TGSI_QUAD_SIZE],
2061                         const float p[TGSI_QUAD_SIZE],
2062                         const float c0[TGSI_QUAD_SIZE],
2063                         const float c1[TGSI_QUAD_SIZE],
2064                         enum tgsi_sampler_control control,
2065                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2066 {
2067    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2068    const struct pipe_resource *texture = samp->view->texture;
2069    int level0;
2070    float lambda;
2071    float lod[TGSI_QUAD_SIZE];
2072
2073    float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level);
2074    float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level);
2075    float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2076    float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2077    float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2078    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2079
2080    if (control == tgsi_sampler_lod_bias) {
2081       /* note: instead of working with Px and Py, we will use the
2082        * squared length instead, to avoid sqrt.
2083        */
2084       float Px2 = dudx * dudx + dvdx * dvdx;
2085       float Py2 = dudy * dudy + dvdy * dvdy;
2086
2087       float Pmax2;
2088       float Pmin2;
2089       float e;
2090       const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy;
2091
2092       if (Px2 < Py2) {
2093          Pmax2 = Py2;
2094          Pmin2 = Px2;
2095       }
2096       else {
2097          Pmax2 = Px2;
2098          Pmin2 = Py2;
2099       }
2100
2101       /* if the eccentricity of the ellipse is too big, scale up the shorter
2102        * of the two vectors to limit the maximum amount of work per pixel
2103        */
2104       e = Pmax2 / Pmin2;
2105       if (e > maxEccentricity) {
2106          /* float s=e / maxEccentricity;
2107             minor[0] *= s;
2108             minor[1] *= s;
2109             Pmin2 *= s; */
2110          Pmin2 = Pmax2 / maxEccentricity;
2111       }
2112
2113       /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2114        * this since 0.5*log(x) = log(sqrt(x))
2115        */
2116       lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
2117       compute_lod(samp->sampler, lambda, c0, lod);
2118    }
2119    else {
2120       assert(control == tgsi_sampler_lod_explicit);
2121
2122       memcpy(lod, c0, sizeof(lod));
2123    }
2124
2125    /* XXX: Take into account all lod values.
2126     */
2127    lambda = lod[0];
2128    level0 = samp->view->u.tex.first_level + (int)lambda;
2129
2130    /* If the ellipse covers the whole image, we can
2131     * simply return the average of the whole image.
2132     */
2133    if (level0 >= (int) texture->last_level) {
2134       int j;
2135       for (j = 0; j < TGSI_QUAD_SIZE; j++)
2136          samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level,
2137                               samp->faces[j], &rgba[0][j]);
2138    }
2139    else {
2140       /* don't bother interpolating between multiple LODs; it doesn't
2141        * seem to be worth the extra running time.
2142        */
2143       img_filter_2d_ewa(tgsi_sampler, s, t, p, level0,
2144                         dudx, dvdx, dudy, dvdy, rgba);
2145    }
2146
2147    if (DEBUG_TEX) {
2148       print_sample_4(__FUNCTION__, rgba);
2149    }
2150 }
2151
2152
2153 /**
2154  * Specialized version of mip_filter_linear with hard-wired calls to
2155  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2156  */
2157 static void
2158 mip_filter_linear_2d_linear_repeat_POT(
2159    struct tgsi_sampler *tgsi_sampler,
2160    const float s[TGSI_QUAD_SIZE],
2161    const float t[TGSI_QUAD_SIZE],
2162    const float p[TGSI_QUAD_SIZE],
2163    const float c0[TGSI_QUAD_SIZE],
2164    const float c1[TGSI_QUAD_SIZE],
2165    enum tgsi_sampler_control control,
2166    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2167 {
2168    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2169    const struct pipe_resource *texture = samp->view->texture;
2170    int j;
2171    float lambda;
2172    float lod[TGSI_QUAD_SIZE];
2173
2174    if (control == tgsi_sampler_lod_bias) {
2175       lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
2176       compute_lod(samp->sampler, lambda, c0, lod);
2177    } else {
2178       assert(control == tgsi_sampler_lod_explicit);
2179
2180       memcpy(lod, c0, sizeof(lod));
2181    }
2182
2183    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2184       int level0 = samp->view->u.tex.first_level + (int)lod[j];
2185
2186       /* Catches both negative and large values of level0:
2187        */
2188       if ((unsigned)level0 >= texture->last_level) {
2189          if (level0 < 0)
2190             img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j],
2191                                             samp->view->u.tex.first_level,
2192                                             samp->faces[j], &rgba[0][j]);
2193          else
2194             img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j],
2195                                             samp->view->texture->last_level,
2196                                             samp->faces[j], &rgba[0][j]);
2197
2198       }
2199       else {
2200          float levelBlend = frac(lod[j]);
2201          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2202          int c;
2203
2204          img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0,
2205                                          samp->faces[j], &rgbax[0][0]);
2206          img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0+1,
2207                                          samp->faces[j], &rgbax[0][1]);
2208
2209          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2210             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2211       }
2212    }
2213
2214    if (DEBUG_TEX) {
2215       print_sample_4(__FUNCTION__, rgba);
2216    }
2217 }
2218
2219
2220 /**
2221  * Do shadow/depth comparisons.
2222  */
2223 static void
2224 sample_compare(struct tgsi_sampler *tgsi_sampler,
2225                const float s[TGSI_QUAD_SIZE],
2226                const float t[TGSI_QUAD_SIZE],
2227                const float p[TGSI_QUAD_SIZE],
2228                const float c0[TGSI_QUAD_SIZE],
2229                const float c1[TGSI_QUAD_SIZE],
2230                enum tgsi_sampler_control control,
2231                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2232 {
2233    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2234    const struct pipe_sampler_state *sampler = samp->sampler;
2235    int j, k0, k1, k2, k3;
2236    float val;
2237    float pc0, pc1, pc2, pc3;
2238
2239    samp->mip_filter(tgsi_sampler, s, t, p, c0, c1, control, rgba);
2240
2241    /**
2242     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2243     * for 2D Array texture we need to use the 'c0' (aka Q).
2244     * When we sampled the depth texture, the depth value was put into all
2245     * RGBA channels.  We look at the red channel here.
2246     */
2247
2248    if (samp->view->texture->target == PIPE_TEXTURE_2D_ARRAY ||
2249        samp->view->texture->target == PIPE_TEXTURE_CUBE) {
2250       pc0 = CLAMP(c0[0], 0.0F, 1.0F);
2251       pc1 = CLAMP(c0[1], 0.0F, 1.0F);
2252       pc2 = CLAMP(c0[2], 0.0F, 1.0F);
2253       pc3 = CLAMP(c0[3], 0.0F, 1.0F);
2254    } else if (samp->view->texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
2255       pc0 = CLAMP(c1[0], 0.0F, 1.0F);
2256       pc1 = CLAMP(c1[1], 0.0F, 1.0F);
2257       pc2 = CLAMP(c1[2], 0.0F, 1.0F);
2258       pc3 = CLAMP(c1[3], 0.0F, 1.0F);
2259    } else {
2260       pc0 = CLAMP(p[0], 0.0F, 1.0F);
2261       pc1 = CLAMP(p[1], 0.0F, 1.0F);
2262       pc2 = CLAMP(p[2], 0.0F, 1.0F);
2263       pc3 = CLAMP(p[3], 0.0F, 1.0F);
2264    }
2265    /* compare four texcoords vs. four texture samples */
2266    switch (sampler->compare_func) {
2267    case PIPE_FUNC_LESS:
2268       k0 = pc0 < rgba[0][0];
2269       k1 = pc1 < rgba[0][1];
2270       k2 = pc2 < rgba[0][2];
2271       k3 = pc3 < rgba[0][3];
2272       break;
2273    case PIPE_FUNC_LEQUAL:
2274       k0 = pc0 <= rgba[0][0];
2275       k1 = pc1 <= rgba[0][1];
2276       k2 = pc2 <= rgba[0][2];
2277       k3 = pc3 <= rgba[0][3];
2278       break;
2279    case PIPE_FUNC_GREATER:
2280       k0 = pc0 > rgba[0][0];
2281       k1 = pc1 > rgba[0][1];
2282       k2 = pc2 > rgba[0][2];
2283       k3 = pc3 > rgba[0][3];
2284       break;
2285    case PIPE_FUNC_GEQUAL:
2286       k0 = pc0 >= rgba[0][0];
2287       k1 = pc1 >= rgba[0][1];
2288       k2 = pc2 >= rgba[0][2];
2289       k3 = pc3 >= rgba[0][3];
2290       break;
2291    case PIPE_FUNC_EQUAL:
2292       k0 = pc0 == rgba[0][0];
2293       k1 = pc1 == rgba[0][1];
2294       k2 = pc2 == rgba[0][2];
2295       k3 = pc3 == rgba[0][3];
2296       break;
2297    case PIPE_FUNC_NOTEQUAL:
2298       k0 = pc0 != rgba[0][0];
2299       k1 = pc1 != rgba[0][1];
2300       k2 = pc2 != rgba[0][2];
2301       k3 = pc3 != rgba[0][3];
2302       break;
2303    case PIPE_FUNC_ALWAYS:
2304       k0 = k1 = k2 = k3 = 1;
2305       break;
2306    case PIPE_FUNC_NEVER:
2307       k0 = k1 = k2 = k3 = 0;
2308       break;
2309    default:
2310       k0 = k1 = k2 = k3 = 0;
2311       assert(0);
2312       break;
2313    }
2314
2315    if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
2316       /* convert four pass/fail values to an intensity in [0,1] */
2317       val = 0.25F * (k0 + k1 + k2 + k3);
2318
2319       /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2320       for (j = 0; j < 4; j++) {
2321          rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
2322          rgba[3][j] = 1.0F;
2323       }
2324    } else {
2325       for (j = 0; j < 4; j++) {
2326          rgba[0][j] = k0;
2327          rgba[1][j] = k1;
2328          rgba[2][j] = k2;
2329          rgba[3][j] = 1.0F;
2330       }
2331    }
2332 }
2333
2334
2335 /**
2336  * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2337  * Put face info into the sampler faces[] array.
2338  */
2339 static void
2340 sample_cube(struct tgsi_sampler *tgsi_sampler,
2341             const float s[TGSI_QUAD_SIZE],
2342             const float t[TGSI_QUAD_SIZE],
2343             const float p[TGSI_QUAD_SIZE],
2344             const float c0[TGSI_QUAD_SIZE],
2345             const float c1[TGSI_QUAD_SIZE],
2346             enum tgsi_sampler_control control,
2347             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2348 {
2349    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2350    unsigned j;
2351    float ssss[4], tttt[4];
2352
2353    /* Not actually used, but the intermediate steps that do the
2354     * dereferencing don't know it.
2355     */
2356    static float pppp[4] = { 0, 0, 0, 0 };
2357
2358    pppp[0] = c0[0];
2359    pppp[1] = c0[1];
2360    pppp[2] = c0[2];
2361    pppp[3] = c0[3];
2362    /*
2363      major axis
2364      direction    target                             sc     tc    ma
2365      ----------   -------------------------------    ---    ---   ---
2366      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
2367      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
2368      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
2369      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
2370      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
2371      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
2372    */
2373
2374    /* Choose the cube face and compute new s/t coords for the 2D face.
2375     *
2376     * Use the same cube face for all four pixels in the quad.
2377     *
2378     * This isn't ideal, but if we want to use a different cube face
2379     * per pixel in the quad, we'd have to also compute the per-face
2380     * LOD here too.  That's because the four post-face-selection
2381     * texcoords are no longer related to each other (they're
2382     * per-face!)  so we can't use subtraction to compute the partial
2383     * deriviates to compute the LOD.  Doing so (near cube edges
2384     * anyway) gives us pretty much random values.
2385     */
2386    {
2387       /* use the average of the four pixel's texcoords to choose the face */
2388       const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2389       const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
2390       const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
2391       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
2392
2393       if (arx >= ary && arx >= arz) {
2394          float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
2395          uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
2396          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2397             const float ima = -0.5F / fabsf(s[j]);
2398             ssss[j] = sign *  p[j] * ima + 0.5F;
2399             tttt[j] =         t[j] * ima + 0.5F;
2400             samp->faces[j] = face;
2401          }
2402       }
2403       else if (ary >= arx && ary >= arz) {
2404          float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
2405          uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
2406          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2407             const float ima = -0.5F / fabsf(t[j]);
2408             ssss[j] =        -s[j] * ima + 0.5F;
2409             tttt[j] = sign * -p[j] * ima + 0.5F;
2410             samp->faces[j] = face;
2411          }
2412       }
2413       else {
2414          float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
2415          uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
2416          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2417             const float ima = -0.5F / fabsf(p[j]);
2418             ssss[j] = sign * -s[j] * ima + 0.5F;
2419             tttt[j] =         t[j] * ima + 0.5F;
2420             samp->faces[j] = face;
2421          }
2422       }
2423    }
2424
2425    /* In our little pipeline, the compare stage is next.  If compare
2426     * is not active, this will point somewhere deeper into the
2427     * pipeline, eg. to mip_filter or even img_filter.
2428     */
2429    samp->compare(tgsi_sampler, ssss, tttt, pppp, c0, c1, control, rgba);
2430 }
2431
2432
2433 static void
2434 do_swizzling(const struct sp_sampler_variant *samp,
2435              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2436              float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2437 {
2438    int j;
2439    const unsigned swizzle_r = samp->key.bits.swizzle_r;
2440    const unsigned swizzle_g = samp->key.bits.swizzle_g;
2441    const unsigned swizzle_b = samp->key.bits.swizzle_b;
2442    const unsigned swizzle_a = samp->key.bits.swizzle_a;
2443
2444    switch (swizzle_r) {
2445    case PIPE_SWIZZLE_ZERO:
2446       for (j = 0; j < 4; j++)
2447          out[0][j] = 0.0f;
2448       break;
2449    case PIPE_SWIZZLE_ONE:
2450       for (j = 0; j < 4; j++)
2451          out[0][j] = 1.0f;
2452       break;
2453    default:
2454       assert(swizzle_r < 4);
2455       for (j = 0; j < 4; j++)
2456          out[0][j] = in[swizzle_r][j];
2457    }
2458
2459    switch (swizzle_g) {
2460    case PIPE_SWIZZLE_ZERO:
2461       for (j = 0; j < 4; j++)
2462          out[1][j] = 0.0f;
2463       break;
2464    case PIPE_SWIZZLE_ONE:
2465       for (j = 0; j < 4; j++)
2466          out[1][j] = 1.0f;
2467       break;
2468    default:
2469       assert(swizzle_g < 4);
2470       for (j = 0; j < 4; j++)
2471          out[1][j] = in[swizzle_g][j];
2472    }
2473
2474    switch (swizzle_b) {
2475    case PIPE_SWIZZLE_ZERO:
2476       for (j = 0; j < 4; j++)
2477          out[2][j] = 0.0f;
2478       break;
2479    case PIPE_SWIZZLE_ONE:
2480       for (j = 0; j < 4; j++)
2481          out[2][j] = 1.0f;
2482       break;
2483    default:
2484       assert(swizzle_b < 4);
2485       for (j = 0; j < 4; j++)
2486          out[2][j] = in[swizzle_b][j];
2487    }
2488
2489    switch (swizzle_a) {
2490    case PIPE_SWIZZLE_ZERO:
2491       for (j = 0; j < 4; j++)
2492          out[3][j] = 0.0f;
2493       break;
2494    case PIPE_SWIZZLE_ONE:
2495       for (j = 0; j < 4; j++)
2496          out[3][j] = 1.0f;
2497       break;
2498    default:
2499       assert(swizzle_a < 4);
2500       for (j = 0; j < 4; j++)
2501          out[3][j] = in[swizzle_a][j];
2502    }
2503 }
2504
2505
2506 static void
2507 sample_swizzle(struct tgsi_sampler *tgsi_sampler,
2508                const float s[TGSI_QUAD_SIZE],
2509                const float t[TGSI_QUAD_SIZE],
2510                const float p[TGSI_QUAD_SIZE],
2511                const float c0[TGSI_QUAD_SIZE],
2512                const float c1[TGSI_QUAD_SIZE],
2513                enum tgsi_sampler_control control,
2514                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2515 {
2516    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2517    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2518
2519    samp->sample_target(tgsi_sampler, s, t, p, c0, c1, control, rgba_temp);
2520
2521    do_swizzling(samp, rgba_temp, rgba);
2522 }
2523
2524
2525 static wrap_nearest_func
2526 get_nearest_unorm_wrap(unsigned mode)
2527 {
2528    switch (mode) {
2529    case PIPE_TEX_WRAP_CLAMP:
2530       return wrap_nearest_unorm_clamp;
2531    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2532       return wrap_nearest_unorm_clamp_to_edge;
2533    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2534       return wrap_nearest_unorm_clamp_to_border;
2535    default:
2536       assert(0);
2537       return wrap_nearest_unorm_clamp;
2538    }
2539 }
2540
2541
2542 static wrap_nearest_func
2543 get_nearest_wrap(unsigned mode)
2544 {
2545    switch (mode) {
2546    case PIPE_TEX_WRAP_REPEAT:
2547       return wrap_nearest_repeat;
2548    case PIPE_TEX_WRAP_CLAMP:
2549       return wrap_nearest_clamp;
2550    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2551       return wrap_nearest_clamp_to_edge;
2552    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2553       return wrap_nearest_clamp_to_border;
2554    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2555       return wrap_nearest_mirror_repeat;
2556    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2557       return wrap_nearest_mirror_clamp;
2558    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2559       return wrap_nearest_mirror_clamp_to_edge;
2560    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2561       return wrap_nearest_mirror_clamp_to_border;
2562    default:
2563       assert(0);
2564       return wrap_nearest_repeat;
2565    }
2566 }
2567
2568
2569 static wrap_linear_func
2570 get_linear_unorm_wrap(unsigned mode)
2571 {
2572    switch (mode) {
2573    case PIPE_TEX_WRAP_CLAMP:
2574       return wrap_linear_unorm_clamp;
2575    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2576       return wrap_linear_unorm_clamp_to_edge;
2577    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2578       return wrap_linear_unorm_clamp_to_border;
2579    default:
2580       assert(0);
2581       return wrap_linear_unorm_clamp;
2582    }
2583 }
2584
2585
2586 static wrap_linear_func
2587 get_linear_wrap(unsigned mode)
2588 {
2589    switch (mode) {
2590    case PIPE_TEX_WRAP_REPEAT:
2591       return wrap_linear_repeat;
2592    case PIPE_TEX_WRAP_CLAMP:
2593       return wrap_linear_clamp;
2594    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2595       return wrap_linear_clamp_to_edge;
2596    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2597       return wrap_linear_clamp_to_border;
2598    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2599       return wrap_linear_mirror_repeat;
2600    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2601       return wrap_linear_mirror_clamp;
2602    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2603       return wrap_linear_mirror_clamp_to_edge;
2604    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2605       return wrap_linear_mirror_clamp_to_border;
2606    default:
2607       assert(0);
2608       return wrap_linear_repeat;
2609    }
2610 }
2611
2612
2613 /**
2614  * Is swizzling needed for the given state key?
2615  */
2616 static INLINE bool
2617 any_swizzle(union sp_sampler_key key)
2618 {
2619    return (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
2620            key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
2621            key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
2622            key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
2623 }
2624
2625
2626 static compute_lambda_func
2627 get_lambda_func(const union sp_sampler_key key)
2628 {
2629    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
2630       return compute_lambda_vert;
2631
2632    switch (key.bits.target) {
2633    case PIPE_BUFFER:
2634    case PIPE_TEXTURE_1D:
2635    case PIPE_TEXTURE_1D_ARRAY:
2636       return compute_lambda_1d;
2637    case PIPE_TEXTURE_2D:
2638    case PIPE_TEXTURE_2D_ARRAY:
2639    case PIPE_TEXTURE_RECT:
2640    case PIPE_TEXTURE_CUBE:
2641    case PIPE_TEXTURE_CUBE_ARRAY:
2642       return compute_lambda_2d;
2643    case PIPE_TEXTURE_3D:
2644       return compute_lambda_3d;
2645    default:
2646       assert(0);
2647       return compute_lambda_1d;
2648    }
2649 }
2650
2651
2652 static img_filter_func
2653 get_img_filter(const union sp_sampler_key key,
2654                unsigned filter,
2655                const struct pipe_sampler_state *sampler)
2656 {
2657    switch (key.bits.target) {
2658    case PIPE_BUFFER:
2659    case PIPE_TEXTURE_1D:
2660       if (filter == PIPE_TEX_FILTER_NEAREST)
2661          return img_filter_1d_nearest;
2662       else
2663          return img_filter_1d_linear;
2664       break;
2665    case PIPE_TEXTURE_1D_ARRAY:
2666       if (filter == PIPE_TEX_FILTER_NEAREST)
2667          return img_filter_1d_array_nearest;
2668       else
2669          return img_filter_1d_array_linear;
2670       break;
2671    case PIPE_TEXTURE_2D:
2672    case PIPE_TEXTURE_RECT:
2673       /* Try for fast path:
2674        */
2675       if (key.bits.is_pot &&
2676           sampler->wrap_s == sampler->wrap_t &&
2677           sampler->normalized_coords)
2678       {
2679          switch (sampler->wrap_s) {
2680          case PIPE_TEX_WRAP_REPEAT:
2681             switch (filter) {
2682             case PIPE_TEX_FILTER_NEAREST:
2683                return img_filter_2d_nearest_repeat_POT;
2684             case PIPE_TEX_FILTER_LINEAR:
2685                return img_filter_2d_linear_repeat_POT;
2686             default:
2687                break;
2688             }
2689             break;
2690          case PIPE_TEX_WRAP_CLAMP:
2691             switch (filter) {
2692             case PIPE_TEX_FILTER_NEAREST:
2693                return img_filter_2d_nearest_clamp_POT;
2694             default:
2695                break;
2696             }
2697          }
2698       }
2699       /* Otherwise use default versions:
2700        */
2701       if (filter == PIPE_TEX_FILTER_NEAREST)
2702          return img_filter_2d_nearest;
2703       else
2704          return img_filter_2d_linear;
2705       break;
2706    case PIPE_TEXTURE_2D_ARRAY:
2707       if (filter == PIPE_TEX_FILTER_NEAREST)
2708          return img_filter_2d_array_nearest;
2709       else
2710          return img_filter_2d_array_linear;
2711       break;
2712    case PIPE_TEXTURE_CUBE:
2713       if (filter == PIPE_TEX_FILTER_NEAREST)
2714          return img_filter_cube_nearest;
2715       else
2716          return img_filter_cube_linear;
2717       break;
2718    case PIPE_TEXTURE_CUBE_ARRAY:
2719       if (filter == PIPE_TEX_FILTER_NEAREST)
2720          return img_filter_cube_array_nearest;
2721       else
2722          return img_filter_cube_array_linear;
2723       break;
2724    case PIPE_TEXTURE_3D:
2725       if (filter == PIPE_TEX_FILTER_NEAREST)
2726          return img_filter_3d_nearest;
2727       else
2728          return img_filter_3d_linear;
2729       break;
2730    default:
2731       assert(0);
2732       return img_filter_1d_nearest;
2733    }
2734 }
2735
2736
2737 /**
2738  * Bind the given texture object and texture cache to the sampler variant.
2739  */
2740 void
2741 sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
2742                               struct softpipe_tex_tile_cache *tex_cache,
2743                               const struct pipe_sampler_view *view )
2744 {
2745    const struct pipe_resource *texture = view->texture;
2746
2747    samp->view = view;
2748    samp->cache = tex_cache;
2749    samp->xpot = util_logbase2( texture->width0 );
2750    samp->ypot = util_logbase2( texture->height0 );
2751 }
2752
2753
2754 void
2755 sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
2756 {
2757    FREE(samp);
2758 }
2759
2760
2761 static void
2762 sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
2763                 int dims[4])
2764 {
2765     struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2766     const struct pipe_sampler_view *view = samp->view;
2767     const struct pipe_resource *texture = view->texture;
2768
2769     /* undefined according to EXT_gpu_program */
2770     level += view->u.tex.first_level;
2771     if (level > view->u.tex.last_level)
2772         return;
2773
2774     dims[0] = u_minify(texture->width0, level);
2775
2776     switch(texture->target) {
2777     case PIPE_TEXTURE_1D_ARRAY:
2778        dims[1] = texture->array_size;
2779        /* fallthrough */
2780     case PIPE_TEXTURE_1D:
2781        return;
2782     case PIPE_TEXTURE_2D_ARRAY:
2783        dims[2] = texture->array_size;
2784        /* fallthrough */
2785     case PIPE_TEXTURE_2D:
2786     case PIPE_TEXTURE_CUBE:
2787     case PIPE_TEXTURE_RECT:
2788        dims[1] = u_minify(texture->height0, level);
2789        return;
2790     case PIPE_TEXTURE_3D:
2791        dims[1] = u_minify(texture->height0, level);
2792        dims[2] = u_minify(texture->depth0, level);
2793        return;
2794     case PIPE_TEXTURE_CUBE_ARRAY:
2795        dims[1] = u_minify(texture->height0, level);
2796        dims[2] = texture->array_size / 6;
2797        break;
2798     case PIPE_BUFFER:
2799        dims[0] /= util_format_get_blocksize(view->format);
2800        return;
2801     default:
2802        assert(!"unexpected texture target in sample_get_dims()");
2803        return;
2804     }
2805 }
2806
2807 /**
2808  * This function is only used for getting unfiltered texels via the
2809  * TXF opcode.  The GL spec says that out-of-bounds texel fetches
2810  * produce undefined results.  Instead of crashing, lets just clamp
2811  * coords to the texture image size.
2812  */
2813 static void
2814 sample_get_texels(struct tgsi_sampler *tgsi_sampler,
2815                   const int v_i[TGSI_QUAD_SIZE],
2816                   const int v_j[TGSI_QUAD_SIZE],
2817                   const int v_k[TGSI_QUAD_SIZE],
2818                   const int lod[TGSI_QUAD_SIZE],
2819                   const int8_t offset[3],
2820                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2821 {
2822    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2823    union tex_tile_address addr;
2824    const struct pipe_resource *texture = samp->view->texture;
2825    int j, c;
2826    const float *tx;
2827    const bool need_swizzle = any_swizzle(samp->key);
2828    int width, height, depth, layers;
2829
2830    addr.value = 0;
2831    /* TODO write a better test for LOD */
2832    addr.bits.level = lod[0];
2833
2834    width = u_minify(texture->width0, addr.bits.level);
2835    height = u_minify(texture->height0, addr.bits.level);
2836    depth = u_minify(texture->depth0, addr.bits.level);
2837    layers = texture->array_size;
2838
2839    switch(texture->target) {
2840    case PIPE_BUFFER:
2841    case PIPE_TEXTURE_1D:
2842       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2843          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2844          tx = get_texel_2d(samp, addr, x, 0);
2845          for (c = 0; c < 4; c++) {
2846             rgba[c][j] = tx[c];
2847          }
2848       }
2849       break;
2850    case PIPE_TEXTURE_1D_ARRAY:
2851       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2852          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2853          int y = CLAMP(v_j[j], 0, layers - 1);
2854          tx = get_texel_1d_array(samp, addr, x, y);
2855          for (c = 0; c < 4; c++) {
2856             rgba[c][j] = tx[c];
2857          }
2858       }
2859       break;
2860    case PIPE_TEXTURE_2D:
2861    case PIPE_TEXTURE_RECT:
2862       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2863          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2864          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2865          tx = get_texel_2d(samp, addr, x, y);
2866          for (c = 0; c < 4; c++) {
2867             rgba[c][j] = tx[c];
2868          }
2869       }
2870       break;
2871    case PIPE_TEXTURE_2D_ARRAY:
2872       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2873          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2874          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2875          int layer = CLAMP(v_k[j], 0, layers - 1);
2876          tx = get_texel_2d_array(samp, addr, x, y, layer);
2877          for (c = 0; c < 4; c++) {
2878             rgba[c][j] = tx[c];
2879          }
2880       }
2881       break;
2882    case PIPE_TEXTURE_3D:
2883       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2884          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2885          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2886          int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
2887
2888          tx = get_texel_3d(samp, addr, x, y, z);
2889          for (c = 0; c < 4; c++) {
2890             rgba[c][j] = tx[c];
2891          }
2892       }
2893       break;
2894    case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
2895    default:
2896       assert(!"Unknown or CUBE texture type in TXF processing\n");
2897       break;
2898    }
2899
2900    if (need_swizzle) {
2901       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2902       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2903       do_swizzling(samp, rgba_temp, rgba);
2904    }
2905 }
2906
2907
2908 /**
2909  * Create a sampler variant for a given set of non-orthogonal state.
2910  */
2911 struct sp_sampler_variant *
2912 sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
2913                            const union sp_sampler_key key )
2914 {
2915    struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
2916    if (!samp)
2917       return NULL;
2918
2919    samp->sampler = sampler;
2920    samp->key = key;
2921
2922    /* Note that (for instance) linear_texcoord_s and
2923     * nearest_texcoord_s may be active at the same time, if the
2924     * sampler min_img_filter differs from its mag_img_filter.
2925     */
2926    if (sampler->normalized_coords) {
2927       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
2928       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
2929       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
2930
2931       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
2932       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
2933       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
2934    }
2935    else {
2936       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
2937       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
2938       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
2939
2940       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
2941       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
2942       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
2943    }
2944
2945    samp->compute_lambda = get_lambda_func( key );
2946
2947    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
2948    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
2949
2950    switch (sampler->min_mip_filter) {
2951    case PIPE_TEX_MIPFILTER_NONE:
2952       if (sampler->min_img_filter == sampler->mag_img_filter)
2953          samp->mip_filter = mip_filter_none_no_filter_select;
2954       else
2955          samp->mip_filter = mip_filter_none;
2956       break;
2957
2958    case PIPE_TEX_MIPFILTER_NEAREST:
2959       samp->mip_filter = mip_filter_nearest;
2960       break;
2961
2962    case PIPE_TEX_MIPFILTER_LINEAR:
2963       if (key.bits.is_pot &&
2964           key.bits.target == PIPE_TEXTURE_2D &&
2965           sampler->min_img_filter == sampler->mag_img_filter &&
2966           sampler->normalized_coords &&
2967           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
2968           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
2969           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2970          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2971       }
2972       else {
2973          samp->mip_filter = mip_filter_linear;
2974       }
2975
2976       /* Anisotropic filtering extension. */
2977       if (sampler->max_anisotropy > 1) {
2978         samp->mip_filter = mip_filter_linear_aniso;
2979
2980         /* Override min_img_filter:
2981          * min_img_filter needs to be set to NEAREST since we need to access
2982          * each texture pixel as it is and weight it later; using linear
2983          * filters will have incorrect results.
2984          * By setting the filter to NEAREST here, we can avoid calling the
2985          * generic img_filter_2d_nearest in the anisotropic filter function,
2986          * making it possible to use one of the accelerated implementations
2987          */
2988         samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler);
2989
2990         /* on first access create the lookup table containing the filter weights. */
2991         if (!weightLut) {
2992            create_filter_table();
2993         }
2994       }
2995
2996       break;
2997    }
2998
2999    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
3000       samp->compare = sample_compare;
3001    }
3002    else {
3003       /* Skip compare operation by promoting the mip_filter function
3004        * pointer:
3005        */
3006       samp->compare = samp->mip_filter;
3007    }
3008
3009    if (key.bits.target == PIPE_TEXTURE_CUBE || key.bits.target == PIPE_TEXTURE_CUBE_ARRAY) {
3010       samp->sample_target = sample_cube;
3011    }
3012    else {
3013       samp->faces[0] = 0;
3014       samp->faces[1] = 0;
3015       samp->faces[2] = 0;
3016       samp->faces[3] = 0;
3017
3018       /* Skip cube face determination by promoting the compare
3019        * function pointer:
3020        */
3021       samp->sample_target = samp->compare;
3022    }
3023
3024    if (any_swizzle(key)) {
3025       samp->base.get_samples = sample_swizzle;
3026    }
3027    else {
3028       samp->base.get_samples = samp->sample_target;
3029    }
3030
3031    samp->base.get_dims = sample_get_dims;
3032    samp->base.get_texel = sample_get_texels;
3033    return samp;
3034 }