src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_memory.h"
  43 #include "util/u_inlines.h"
  44 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  45 #include "sp_tex_sample.h"
  46 #include "sp_texture.h"
  47 #include "sp_tex_tile_cache.h"
  48
  49
  50 /** Set to one to help debug texture sampling */
  51 #define DEBUG_TEX 0
  52
  53
  54 /*
  55  * Return fractional part of 'f'.  Used for computing interpolation weights.
  56  * Need to be careful with negative values.
  57  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  58  * of improperly weighted linear-filtered textures.
  59  * The tests/texwrap.c demo is a good test.
  60  */
  61 static inline float
  62 frac(float f)
  63 {
  64    return f - floorf(f);
  65 }
  66
  67
  68
  69 /**
  70  * Linear interpolation macro
  71  */
  72 static inline float
  73 lerp(float a, float v0, float v1)
  74 {
  75    return v0 + a * (v1 - v0);
  76 }
  77
  78
  79 /**
  80  * Do 2D/bilinear interpolation of float values.
  81  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  82  * a and b are the horizontal and vertical interpolants.
  83  * It's important that this function is inlined when compiled with
  84  * optimization!  If we find that's not true on some systems, convert
  85  * to a macro.
  86  */
  87 static inline float
  88 lerp_2d(float a, float b,
  89         float v00, float v10, float v01, float v11)
  90 {
  91    const float temp0 = lerp(a, v00, v10);
  92    const float temp1 = lerp(a, v01, v11);
  93    return lerp(b, temp0, temp1);
  94 }
  95
  96
  97 /**
  98  * As above, but 3D interpolation of 8 values.
  99  */
 100 static inline float
 101 lerp_3d(float a, float b, float c,
 102         float v000, float v100, float v010, float v110,
 103         float v001, float v101, float v011, float v111)
 104 {
 105    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 106    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 107    return lerp(c, temp0, temp1);
 108 }
 109
 110
 111
 112 /**
 113  * Compute coord % size for repeat wrap modes.
 114  * Note that if coord is negative, coord % size doesn't give the right
 115  * value.  To avoid that problem we add a large multiple of the size
 116  * (rather than using a conditional).
 117  */
 118 static inline int
 119 repeat(int coord, unsigned size)
 120 {
 121    return (coord + size * 1024) % size;
 122 }
 123
 124
 125 /**
 126  * Apply texture coord wrapping mode and return integer texture indexes
 127  * for a vector of four texcoords (S or T or P).
 128  * \param wrapMode  PIPE_TEX_WRAP_x
 129  * \param s  the incoming texcoords
 130  * \param size  the texture image size
 131  * \param icoord  returns the integer texcoords
 132  */
 133 static void
 134 wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
 135 {
 136    /* s limited to [0,1) */
 137    /* i limited to [0,size-1] */
 138    const int i = util_ifloor(s * size);
 139    *icoord = repeat(i + offset, size);
 140 }
 141
 142
 143 static void
 144 wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord)
 145 {
 146    /* s limited to [0,1] */
 147    /* i limited to [0,size-1] */
 148    s *= size;
 149    s += offset;
 150    if (s <= 0.0F)
 151       *icoord = 0;
 152    else if (s >= size)
 153       *icoord = size - 1;
 154    else
 155       *icoord = util_ifloor(s);
 156 }
 157
 158
 159 static void
 160 wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 161 {
 162    /* s limited to [min,max] */
 163    /* i limited to [0, size-1] */
 164    const float min = 0.5F;
 165    const float max = (float)size - 0.5F;
 166
 167    s *= size;
 168    s += offset;
 169
 170    if (s < min)
 171       *icoord = 0;
 172    else if (s > max)
 173       *icoord = size - 1;
 174    else
 175       *icoord = util_ifloor(s);
 176 }
 177
 178
 179 static void
 180 wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 181 {
 182    /* s limited to [min,max] */
 183    /* i limited to [-1, size] */
 184    const float min = -0.5F;
 185    const float max = size + 0.5F;
 186
 187    s *= size;
 188    s += offset;
 189    if (s <= min)
 190       *icoord = -1;
 191    else if (s >= max)
 192       *icoord = size;
 193    else
 194       *icoord = util_ifloor(s);
 195 }
 196
 197 static void
 198 wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
 199 {
 200    const float min = 1.0F / (2.0F * size);
 201    const float max = 1.0F - min;
 202    int flr;
 203    float u;
 204
 205    s += (float)offset / size;
 206    flr = util_ifloor(s);
 207    u = frac(s);
 208    if (flr & 1)
 209       u = 1.0F - u;
 210    if (u < min)
 211       *icoord = 0;
 212    else if (u > max)
 213       *icoord = size - 1;
 214    else
 215       *icoord = util_ifloor(u * size);
 216 }
 217
 218
 219 static void
 220 wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord)
 221 {
 222    /* s limited to [0,1] */
 223    /* i limited to [0,size-1] */
 224    const float u = fabsf(s * size + offset);
 225    if (u <= 0.0F)
 226       *icoord = 0;
 227    else if (u >= size)
 228       *icoord = size - 1;
 229    else
 230       *icoord = util_ifloor(u);
 231 }
 232
 233
 234 static void
 235 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 236 {
 237    /* s limited to [min,max] */
 238    /* i limited to [0, size-1] */
 239    const float min = 0.5F;
 240    const float max = (float)size - 0.5F;
 241    const float u = fabsf(s * size + offset);
 242
 243    if (u < min)
 244       *icoord = 0;
 245    else if (u > max)
 246       *icoord = size - 1;
 247    else
 248       *icoord = util_ifloor(u);
 249 }
 250
 251
 252 static void
 253 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 254 {
 255    /* u limited to [-0.5, size-0.5] */
 256    const float min = -0.5F;
 257    const float max = (float)size + 0.5F;
 258    const float u = fabsf(s * size + offset);
 259
 260    if (u < min)
 261       *icoord = -1;
 262    else if (u > max)
 263       *icoord = size;
 264    else
 265       *icoord = util_ifloor(u);
 266 }
 267
 268
 269 /**
 270  * Used to compute texel locations for linear sampling
 271  * \param wrapMode  PIPE_TEX_WRAP_x
 272  * \param s  the texcoord
 273  * \param size  the texture image size
 274  * \param icoord0  returns first texture index
 275  * \param icoord1  returns second texture index (usually icoord0 + 1)
 276  * \param w  returns blend factor/weight between texture indices
 277  * \param icoord  returns the computed integer texture coord
 278  */
 279 static void
 280 wrap_linear_repeat(float s, unsigned size, int offset,
 281                    int *icoord0, int *icoord1, float *w)
 282 {
 283    const float u = s * size - 0.5F;
 284    *icoord0 = repeat(util_ifloor(u) + offset, size);
 285    *icoord1 = repeat(*icoord0 + 1, size);
 286    *w = frac(u);
 287 }
 288
 289
 290 static void
 291 wrap_linear_clamp(float s, unsigned size, int offset,
 292                   int *icoord0, int *icoord1, float *w)
 293 {
 294    const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
 295
 296    *icoord0 = util_ifloor(u);
 297    *icoord1 = *icoord0 + 1;
 298    *w = frac(u);
 299 }
 300
 301
 302 static void
 303 wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
 304                           int *icoord0, int *icoord1, float *w)
 305 {
 306    const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
 307    *icoord0 = util_ifloor(u);
 308    *icoord1 = *icoord0 + 1;
 309    if (*icoord0 < 0)
 310       *icoord0 = 0;
 311    if (*icoord1 >= (int) size)
 312       *icoord1 = size - 1;
 313    *w = frac(u);
 314 }
 315
 316
 317 static void
 318 wrap_linear_clamp_to_border(float s, unsigned size, int offset,
 319                             int *icoord0, int *icoord1, float *w)
 320 {
 321    const float min = -0.5F;
 322    const float max = (float)size + 0.5F;
 323    const float u = CLAMP(s * size + offset, min, max) - 0.5f;
 324    *icoord0 = util_ifloor(u);
 325    *icoord1 = *icoord0 + 1;
 326    *w = frac(u);
 327 }
 328
 329
 330 static void
 331 wrap_linear_mirror_repeat(float s, unsigned size, int offset,
 332                           int *icoord0, int *icoord1, float *w)
 333 {
 334    int flr;
 335    float u;
 336
 337    s += (float)offset / size;
 338    flr = util_ifloor(s);
 339    u = frac(s);
 340    if (flr & 1)
 341       u = 1.0F - u;
 342    u = u * size - 0.5F;
 343    *icoord0 = util_ifloor(u);
 344    *icoord1 = *icoord0 + 1;
 345    if (*icoord0 < 0)
 346       *icoord0 = 0;
 347    if (*icoord1 >= (int) size)
 348       *icoord1 = size - 1;
 349    *w = frac(u);
 350 }
 351
 352
 353 static void
 354 wrap_linear_mirror_clamp(float s, unsigned size, int offset,
 355                          int *icoord0, int *icoord1, float *w)
 356 {
 357    float u = fabsf(s * size + offset);
 358    if (u >= size)
 359       u = (float) size;
 360    u -= 0.5F;
 361    *icoord0 = util_ifloor(u);
 362    *icoord1 = *icoord0 + 1;
 363    *w = frac(u);
 364 }
 365
 366
 367 static void
 368 wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset,
 369                                  int *icoord0, int *icoord1, float *w)
 370 {
 371    float u = fabsf(s * size + offset);
 372    if (u >= size)
 373       u = (float) size;
 374    u -= 0.5F;
 375    *icoord0 = util_ifloor(u);
 376    *icoord1 = *icoord0 + 1;
 377    if (*icoord0 < 0)
 378       *icoord0 = 0;
 379    if (*icoord1 >= (int) size)
 380       *icoord1 = size - 1;
 381    *w = frac(u);
 382 }
 383
 384
 385 static void
 386 wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
 387                                    int *icoord0, int *icoord1, float *w)
 388 {
 389    const float min = -0.5F;
 390    const float max = size + 0.5F;
 391    const float t = fabsf(s * size + offset);
 392    const float u = CLAMP(t, min, max) - 0.5F;
 393    *icoord0 = util_ifloor(u);
 394    *icoord1 = *icoord0 + 1;
 395    *w = frac(u);
 396 }
 397
 398
 399 /**
 400  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
 401  */
 402 static void
 403 wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
 404 {
 405    const int i = util_ifloor(s);
 406    *icoord = CLAMP(i + offset, 0, (int) size-1);
 407 }
 408
 409
 410 /**
 411  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
 412  */
 413 static void
 414 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 415 {
 416    *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) );
 417 }
 418
 419
 420 /**
 421  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
 422  */
 423 static void
 424 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 425 {
 426    *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) );
 427 }
 428
 429
 430 /**
 431  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
 432  */
 433 static void
 434 wrap_linear_unorm_clamp(float s, unsigned size, int offset,
 435                         int *icoord0, int *icoord1, float *w)
 436 {
 437    /* Not exactly what the spec says, but it matches NVIDIA output */
 438    const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
 439    *icoord0 = util_ifloor(u);
 440    *icoord1 = *icoord0 + 1;
 441    *w = frac(u);
 442 }
 443
 444
 445 /**
 446  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
 447  */
 448 static void
 449 wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
 450                                   int *icoord0, int *icoord1, float *w)
 451 {
 452    const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
 453    *icoord0 = util_ifloor(u);
 454    *icoord1 = *icoord0 + 1;
 455    if (*icoord1 > (int) size - 1)
 456       *icoord1 = size - 1;
 457    *w = frac(u);
 458 }
 459
 460
 461 /**
 462  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
 463  */
 464 static void
 465 wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
 466                                 int *icoord0, int *icoord1, float *w)
 467 {
 468    const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
 469    *icoord0 = util_ifloor(u);
 470    *icoord1 = *icoord0 + 1;
 471    if (*icoord1 > (int) size - 1)
 472       *icoord1 = size - 1;
 473    *w = frac(u);
 474 }
 475
 476
 477 /**
 478  * Do coordinate to array index conversion.  For array textures.
 479  */
 480 static inline int
 481 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
 482 {
 483    const int c = util_ifloor(coord + 0.5F);
 484    return CLAMP(c, (int)first_layer, (int)last_layer);
 485 }
 486
 487
 488 /**
 489  * Examine the quad's texture coordinates to compute the partial
 490  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 491  */
 492 static float
 493 compute_lambda_1d(const struct sp_sampler_view *sview,
 494                   const float s[TGSI_QUAD_SIZE],
 495                   const float t[TGSI_QUAD_SIZE],
 496                   const float p[TGSI_QUAD_SIZE])
 497 {
 498    const struct pipe_resource *texture = sview->base.texture;
 499    const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 500    const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 501    const float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 502
 503    return util_fast_log2(rho);
 504 }
 505
 506
 507 static float
 508 compute_lambda_2d(const struct sp_sampler_view *sview,
 509                   const float s[TGSI_QUAD_SIZE],
 510                   const float t[TGSI_QUAD_SIZE],
 511                   const float p[TGSI_QUAD_SIZE])
 512 {
 513    const struct pipe_resource *texture = sview->base.texture;
 514    const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 515    const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 516    const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 517    const float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 518    const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 519    const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 520    const float rho  = MAX2(maxx, maxy);
 521
 522    return util_fast_log2(rho);
 523 }
 524
 525
 526 static float
 527 compute_lambda_3d(const struct sp_sampler_view *sview,
 528                   const float s[TGSI_QUAD_SIZE],
 529                   const float t[TGSI_QUAD_SIZE],
 530                   const float p[TGSI_QUAD_SIZE])
 531 {
 532    const struct pipe_resource *texture = sview->base.texture;
 533    const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 534    const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 535    const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 536    const float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 537    const float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 538    const float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 539    const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 540    const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 541    const float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
 542    const float rho = MAX3(maxx, maxy, maxz);
 543
 544    return util_fast_log2(rho);
 545 }
 546
 547
 548 /**
 549  * Compute lambda for a vertex texture sampler.
 550  * Since there aren't derivatives to use, just return 0.
 551  */
 552 static float
 553 compute_lambda_vert(const struct sp_sampler_view *sview,
 554                     const float s[TGSI_QUAD_SIZE],
 555                     const float t[TGSI_QUAD_SIZE],
 556                     const float p[TGSI_QUAD_SIZE])
 557 {
 558    return 0.0f;
 559 }
 560
 561
 562
 563 /**
 564  * Get a texel from a texture, using the texture tile cache.
 565  *
 566  * \param addr  the template tex address containing cube, z, face info.
 567  * \param x  the x coord of texel within 2D image
 568  * \param y  the y coord of texel within 2D image
 569  * \param rgba  the quad to put the texel/color into
 570  *
 571  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 572  * sp_get_cached_tile_tex() function.
 573  */
 574
 575
 576
 577
 578 static inline const float *
 579 get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
 580                        union tex_tile_address addr, int x, int y)
 581 {
 582    const struct softpipe_tex_cached_tile *tile;
 583    addr.bits.x = x / TEX_TILE_SIZE;
 584    addr.bits.y = y / TEX_TILE_SIZE;
 585    y %= TEX_TILE_SIZE;
 586    x %= TEX_TILE_SIZE;
 587
 588    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 589
 590    return &tile->data.color[y][x][0];
 591 }
 592
 593
 594 static inline const float *
 595 get_texel_2d(const struct sp_sampler_view *sp_sview,
 596              const struct sp_sampler *sp_samp,
 597              union tex_tile_address addr, int x, int y)
 598 {
 599    const struct pipe_resource *texture = sp_sview->base.texture;
 600    const unsigned level = addr.bits.level;
 601
 602    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 603        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 604       return sp_samp->base.border_color.f;
 605    }
 606    else {
 607       return get_texel_2d_no_border( sp_sview, addr, x, y );
 608    }
 609 }
 610
 611
 612 /*
 613  * Here's the complete logic (HOLY CRAP) for finding next face and doing the
 614  * corresponding coord wrapping, implemented by get_next_face,
 615  * get_next_xcoord, get_next_ycoord.
 616  * Read like that (first line):
 617  * If face is +x and s coord is below zero, then
 618  * new face is +z, new s is max , new t is old t
 619  * (max is always cube size - 1).
 620  *
 621  * +x s- -> +z: s = max,   t = t
 622  * +x s+ -> -z: s = 0,     t = t
 623  * +x t- -> +y: s = max,   t = max-s
 624  * +x t+ -> -y: s = max,   t = s
 625  *
 626  * -x s- -> -z: s = max,   t = t
 627  * -x s+ -> +z: s = 0,     t = t
 628  * -x t- -> +y: s = 0,     t = s
 629  * -x t+ -> -y: s = 0,     t = max-s
 630  *
 631  * +y s- -> -x: s = t,     t = 0
 632  * +y s+ -> +x: s = max-t, t = 0
 633  * +y t- -> -z: s = max-s, t = 0
 634  * +y t+ -> +z: s = s,     t = 0
 635  *
 636  * -y s- -> -x: s = max-t, t = max
 637  * -y s+ -> +x: s = t,     t = max
 638  * -y t- -> +z: s = s,     t = max
 639  * -y t+ -> -z: s = max-s, t = max
 640
 641  * +z s- -> -x: s = max,   t = t
 642  * +z s+ -> +x: s = 0,     t = t
 643  * +z t- -> +y: s = s,     t = max
 644  * +z t+ -> -y: s = s,     t = 0
 645
 646  * -z s- -> +x: s = max,   t = t
 647  * -z s+ -> -x: s = 0,     t = t
 648  * -z t- -> +y: s = max-s, t = 0
 649  * -z t+ -> -y: s = max-s, t = max
 650  */
 651
 652
 653 /*
 654  * seamless cubemap neighbour array.
 655  * this array is used to find the adjacent face in each of 4 directions,
 656  * left, right, up, down. (or -x, +x, -y, +y).
 657  */
 658 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
 659    /* pos X first then neg X is Z different, Y the same */
 660    /* PIPE_TEX_FACE_POS_X,*/
 661    { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
 662      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 663    /* PIPE_TEX_FACE_NEG_X */
 664    { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
 665      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 666
 667    /* pos Y first then neg Y is X different, X the same */
 668    /* PIPE_TEX_FACE_POS_Y */
 669    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 670      PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
 671
 672    /* PIPE_TEX_FACE_NEG_Y */
 673    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 674      PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
 675
 676    /* pos Z first then neg Y is X different, X the same */
 677    /* PIPE_TEX_FACE_POS_Z */
 678    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 679      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 680
 681    /* PIPE_TEX_FACE_NEG_Z */
 682    { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
 683      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
 684 };
 685
 686 static inline unsigned
 687 get_next_face(unsigned face, int idx)
 688 {
 689    return face_array[face][idx];
 690 }
 691
 692 /*
 693  * return a new xcoord based on old face, old coords, cube size
 694  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 695  */
 696 static inline int
 697 get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 698 {
 699    if ((face == 0 && fall_off_index != 1) ||
 700        (face == 1 && fall_off_index == 0) ||
 701        (face == 4 && fall_off_index == 0) ||
 702        (face == 5 && fall_off_index == 0)) {
 703       return max;
 704    }
 705    if ((face == 1 && fall_off_index != 0) ||
 706        (face == 0 && fall_off_index == 1) ||
 707        (face == 4 && fall_off_index == 1) ||
 708        (face == 5 && fall_off_index == 1)) {
 709       return 0;
 710    }
 711    if ((face == 4 && fall_off_index >= 2) ||
 712        (face == 2 && fall_off_index == 3) ||
 713        (face == 3 && fall_off_index == 2)) {
 714       return xc;
 715    }
 716    if ((face == 5 && fall_off_index >= 2) ||
 717        (face == 2 && fall_off_index == 2) ||
 718        (face == 3 && fall_off_index == 3)) {
 719       return max - xc;
 720    }
 721    if ((face == 2 && fall_off_index == 0) ||
 722        (face == 3 && fall_off_index == 1)) {
 723       return yc;
 724    }
 725    /* (face == 2 && fall_off_index == 1) ||
 726       (face == 3 && fall_off_index == 0)) */
 727    return max - yc;
 728 }
 729
 730 /*
 731  * return a new ycoord based on old face, old coords, cube size
 732  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 733  */
 734 static inline int
 735 get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 736 {
 737    if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
 738       return yc;
 739    }
 740    if (face == 2 ||
 741        (face == 4 && fall_off_index == 3) ||
 742        (face == 5 && fall_off_index == 2)) {
 743       return 0;
 744    }
 745    if (face == 3 ||
 746        (face == 4 && fall_off_index == 2) ||
 747        (face == 5 && fall_off_index == 3)) {
 748       return max;
 749    }
 750    if ((face == 0 && fall_off_index == 3) ||
 751        (face == 1 && fall_off_index == 2)) {
 752       return xc;
 753    }
 754    /* (face == 0 && fall_off_index == 2) ||
 755       (face == 1 && fall_off_index == 3) */
 756    return max - xc;
 757 }
 758
 759
 760 /* Gather a quad of adjacent texels within a tile:
 761  */
 762 static inline void
 763 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
 764                                         union tex_tile_address addr,
 765                                         unsigned x, unsigned y,
 766                                         const float *out[4])
 767 {
 768     const struct softpipe_tex_cached_tile *tile;
 769
 770    addr.bits.x = x / TEX_TILE_SIZE;
 771    addr.bits.y = y / TEX_TILE_SIZE;
 772    y %= TEX_TILE_SIZE;
 773    x %= TEX_TILE_SIZE;
 774
 775    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 776
 777    out[0] = &tile->data.color[y  ][x  ][0];
 778    out[1] = &tile->data.color[y  ][x+1][0];
 779    out[2] = &tile->data.color[y+1][x  ][0];
 780    out[3] = &tile->data.color[y+1][x+1][0];
 781 }
 782
 783
 784 /* Gather a quad of potentially non-adjacent texels:
 785  */
 786 static inline void
 787 get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
 788                             union tex_tile_address addr,
 789                             int x0, int y0,
 790                             int x1, int y1,
 791                             const float *out[4])
 792 {
 793    out[0] = get_texel_2d_no_border( sp_sview, addr, x0, y0 );
 794    out[1] = get_texel_2d_no_border( sp_sview, addr, x1, y0 );
 795    out[2] = get_texel_2d_no_border( sp_sview, addr, x0, y1 );
 796    out[3] = get_texel_2d_no_border( sp_sview, addr, x1, y1 );
 797 }
 798
 799 /* Can involve a lot of unnecessary checks for border color:
 800  */
 801 static inline void
 802 get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
 803                   const struct sp_sampler *sp_samp,
 804                   union tex_tile_address addr,
 805                   int x0, int y0,
 806                   int x1, int y1,
 807                   const float *out[4])
 808 {
 809    out[0] = get_texel_2d( sp_sview, sp_samp, addr, x0, y0 );
 810    out[1] = get_texel_2d( sp_sview, sp_samp, addr, x1, y0 );
 811    out[3] = get_texel_2d( sp_sview, sp_samp, addr, x1, y1 );
 812    out[2] = get_texel_2d( sp_sview, sp_samp, addr, x0, y1 );
 813 }
 814
 815
 816
 817 /* 3d variants:
 818  */
 819 static inline const float *
 820 get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
 821                        union tex_tile_address addr, int x, int y, int z)
 822 {
 823    const struct softpipe_tex_cached_tile *tile;
 824
 825    addr.bits.x = x / TEX_TILE_SIZE;
 826    addr.bits.y = y / TEX_TILE_SIZE;
 827    addr.bits.z = z;
 828    y %= TEX_TILE_SIZE;
 829    x %= TEX_TILE_SIZE;
 830
 831    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 832
 833    return &tile->data.color[y][x][0];
 834 }
 835
 836
 837 static inline const float *
 838 get_texel_3d(const struct sp_sampler_view *sp_sview,
 839              const struct sp_sampler *sp_samp,
 840              union tex_tile_address addr, int x, int y, int z)
 841 {
 842    const struct pipe_resource *texture = sp_sview->base.texture;
 843    const unsigned level = addr.bits.level;
 844
 845    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 846        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 847        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 848       return sp_samp->base.border_color.f;
 849    }
 850    else {
 851       return get_texel_3d_no_border( sp_sview, addr, x, y, z );
 852    }
 853 }
 854
 855
 856 /* Get texel pointer for 1D array texture */
 857 static inline const float *
 858 get_texel_1d_array(const struct sp_sampler_view *sp_sview,
 859                    const struct sp_sampler *sp_samp,
 860                    union tex_tile_address addr, int x, int y)
 861 {
 862    const struct pipe_resource *texture = sp_sview->base.texture;
 863    const unsigned level = addr.bits.level;
 864
 865    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
 866       return sp_samp->base.border_color.f;
 867    }
 868    else {
 869       return get_texel_2d_no_border(sp_sview, addr, x, y);
 870    }
 871 }
 872
 873
 874 /* Get texel pointer for 2D array texture */
 875 static inline const float *
 876 get_texel_2d_array(const struct sp_sampler_view *sp_sview,
 877                    const struct sp_sampler *sp_samp,
 878                    union tex_tile_address addr, int x, int y, int layer)
 879 {
 880    const struct pipe_resource *texture = sp_sview->base.texture;
 881    const unsigned level = addr.bits.level;
 882
 883    assert(layer < (int) texture->array_size);
 884    assert(layer >= 0);
 885
 886    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 887        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 888       return sp_samp->base.border_color.f;
 889    }
 890    else {
 891       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 892    }
 893 }
 894
 895
 896 static inline const float *
 897 get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
 898                         union tex_tile_address addr, int x, int y,
 899                         float *corner, int layer, unsigned face)
 900 {
 901    const struct pipe_resource *texture = sp_sview->base.texture;
 902    const unsigned level = addr.bits.level;
 903    int new_x, new_y, max_x;
 904
 905    max_x = (int) u_minify(texture->width0, level);
 906
 907    assert(texture->width0 == texture->height0);
 908    new_x = x;
 909    new_y = y;
 910
 911    /* change the face */
 912    if (x < 0) {
 913       /*
 914        * Cheat with corners. They are difficult and I believe because we don't get
 915        * per-pixel faces we can actually have multiple corner texels per pixel,
 916        * which screws things up majorly in any case (as the per spec behavior is
 917        * to average the 3 remaining texels, which we might not have).
 918        * Hence just make sure that the 2nd coord is clamped, will simply pick the
 919        * sample which would have fallen off the x coord, but not y coord.
 920        * So the filter weight of the samples will be wrong, but at least this
 921        * ensures that only valid texels near the corner are used.
 922        */
 923       if (y < 0 || y >= max_x) {
 924          y = CLAMP(y, 0, max_x - 1);
 925       }
 926       new_x = get_next_xcoord(face, 0, max_x -1, x, y);
 927       new_y = get_next_ycoord(face, 0, max_x -1, x, y);
 928       face = get_next_face(face, 0);
 929    } else if (x >= max_x) {
 930       if (y < 0 || y >= max_x) {
 931          y = CLAMP(y, 0, max_x - 1);
 932       }
 933       new_x = get_next_xcoord(face, 1, max_x -1, x, y);
 934       new_y = get_next_ycoord(face, 1, max_x -1, x, y);
 935       face = get_next_face(face, 1);
 936    } else if (y < 0) {
 937       new_x = get_next_xcoord(face, 2, max_x -1, x, y);
 938       new_y = get_next_ycoord(face, 2, max_x -1, x, y);
 939       face = get_next_face(face, 2);
 940    } else if (y >= max_x) {
 941       new_x = get_next_xcoord(face, 3, max_x -1, x, y);
 942       new_y = get_next_ycoord(face, 3, max_x -1, x, y);
 943       face = get_next_face(face, 3);
 944    }
 945
 946    return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face);
 947 }
 948
 949
 950 /* Get texel pointer for cube array texture */
 951 static inline const float *
 952 get_texel_cube_array(const struct sp_sampler_view *sp_sview,
 953                      const struct sp_sampler *sp_samp,
 954                      union tex_tile_address addr, int x, int y, int layer)
 955 {
 956    const struct pipe_resource *texture = sp_sview->base.texture;
 957    const unsigned level = addr.bits.level;
 958
 959    assert(layer < (int) texture->array_size);
 960    assert(layer >= 0);
 961
 962    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 963        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 964       return sp_samp->base.border_color.f;
 965    }
 966    else {
 967       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 968    }
 969 }
 970 /**
 971  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 972  * return the size (in texels) of that mipmap level.
 973  * For example, if level[0].width = 256 then base_pot will be 8.
 974  * If level = 2, then we'll return 64 (the width at level=2).
 975  * Return 1 if level > base_pot.
 976  */
 977 static inline unsigned
 978 pot_level_size(unsigned base_pot, unsigned level)
 979 {
 980    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 981 }
 982
 983
 984 static void
 985 print_sample(const char *function, const float *rgba)
 986 {
 987    debug_printf("%s %g %g %g %g\n",
 988                 function,
 989                 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
 990 }
 991
 992
 993 static void
 994 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 995 {
 996    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
 997                 function,
 998                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
 999                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1000                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1001                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1002 }
1003
1004
1005 /* Some image-filter fastpaths:
1006  */
1007 static inline void
1008 img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
1009                                 const struct sp_sampler *sp_samp,
1010                                 const struct img_filter_args *args,
1011                                 float *rgba)
1012 {
1013    const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1014    const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1015    const int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
1016    const int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
1017    union tex_tile_address addr;
1018    int c;
1019
1020    const float u = (args->s * xpot - 0.5F) + args->offset[0];
1021    const float v = (args->t * ypot - 0.5F) + args->offset[1];
1022
1023    const int uflr = util_ifloor(u);
1024    const int vflr = util_ifloor(v);
1025
1026    const float xw = u - (float)uflr;
1027    const float yw = v - (float)vflr;
1028
1029    const int x0 = uflr & (xpot - 1);
1030    const int y0 = vflr & (ypot - 1);
1031
1032    const float *tx[4];
1033
1034    addr.value = 0;
1035    addr.bits.level = args->level;
1036
1037    /* Can we fetch all four at once:
1038     */
1039    if (x0 < xmax && y0 < ymax) {
1040       get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
1041    }
1042    else {
1043       const unsigned x1 = (x0 + 1) & (xpot - 1);
1044       const unsigned y1 = (y0 + 1) & (ypot - 1);
1045       get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
1046    }
1047
1048    /* interpolate R, G, B, A */
1049    for (c = 0; c < TGSI_QUAD_SIZE; c++) {
1050       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1051                                        tx[0][c], tx[1][c],
1052                                        tx[2][c], tx[3][c]);
1053    }
1054
1055    if (DEBUG_TEX) {
1056       print_sample(__FUNCTION__, rgba);
1057    }
1058 }
1059
1060
1061 static inline void
1062 img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
1063                                  const struct sp_sampler *sp_samp,
1064                                  const struct img_filter_args *args,
1065                                  float rgba[TGSI_QUAD_SIZE])
1066 {
1067    const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1068    const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1069    const float *out;
1070    union tex_tile_address addr;
1071    int c;
1072
1073    const float u = args->s * xpot + args->offset[0];
1074    const float v = args->t * ypot + args->offset[1];
1075
1076    const int uflr = util_ifloor(u);
1077    const int vflr = util_ifloor(v);
1078
1079    const int x0 = uflr & (xpot - 1);
1080    const int y0 = vflr & (ypot - 1);
1081
1082    addr.value = 0;
1083    addr.bits.level = args->level;
1084
1085    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1086    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1087       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1088
1089    if (DEBUG_TEX) {
1090       print_sample(__FUNCTION__, rgba);
1091    }
1092 }
1093
1094
1095 static inline void
1096 img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
1097                                 const struct sp_sampler *sp_samp,
1098                                 const struct img_filter_args *args,
1099                                 float rgba[TGSI_QUAD_SIZE])
1100 {
1101    const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1102    const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1103    union tex_tile_address addr;
1104    int c;
1105
1106    const float u = args->s * xpot + args->offset[0];
1107    const float v = args->t * ypot + args->offset[1];
1108
1109    int x0, y0;
1110    const float *out;
1111
1112    addr.value = 0;
1113    addr.bits.level = args->level;
1114
1115    x0 = util_ifloor(u);
1116    if (x0 < 0)
1117       x0 = 0;
1118    else if (x0 > (int) xpot - 1)
1119       x0 = xpot - 1;
1120
1121    y0 = util_ifloor(v);
1122    if (y0 < 0)
1123       y0 = 0;
1124    else if (y0 > (int) ypot - 1)
1125       y0 = ypot - 1;
1126
1127    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1128    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1129       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1130
1131    if (DEBUG_TEX) {
1132       print_sample(__FUNCTION__, rgba);
1133    }
1134 }
1135
1136
1137 static void
1138 img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
1139                       const struct sp_sampler *sp_samp,
1140                       const struct img_filter_args *args,
1141                       float rgba[TGSI_QUAD_SIZE])
1142 {
1143    const struct pipe_resource *texture = sp_sview->base.texture;
1144    const int width = u_minify(texture->width0, args->level);
1145    int x;
1146    union tex_tile_address addr;
1147    const float *out;
1148    int c;
1149
1150    assert(width > 0);
1151
1152    addr.value = 0;
1153    addr.bits.level = args->level;
1154
1155    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1156
1157    out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
1158    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1159       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1160
1161    if (DEBUG_TEX) {
1162       print_sample(__FUNCTION__, rgba);
1163    }
1164 }
1165
1166
1167 static void
1168 img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
1169                             const struct sp_sampler *sp_samp,
1170                             const struct img_filter_args *args,
1171                             float *rgba)
1172 {
1173    const struct pipe_resource *texture = sp_sview->base.texture;
1174    const int width = u_minify(texture->width0, args->level);
1175    const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1176                                     sp_sview->base.u.tex.last_layer);
1177    int x;
1178    union tex_tile_address addr;
1179    const float *out;
1180    int c;
1181
1182    assert(width > 0);
1183
1184    addr.value = 0;
1185    addr.bits.level = args->level;
1186
1187    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1188
1189    out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
1190    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1191       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1192
1193    if (DEBUG_TEX) {
1194       print_sample(__FUNCTION__, rgba);
1195    }
1196 }
1197
1198
1199 static void
1200 img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
1201                       const struct sp_sampler *sp_samp,
1202                       const struct img_filter_args *args,
1203                       float *rgba)
1204 {
1205    const struct pipe_resource *texture = sp_sview->base.texture;
1206    const int width = u_minify(texture->width0, args->level);
1207    const int height = u_minify(texture->height0, args->level);
1208    int x, y;
1209    union tex_tile_address addr;
1210    const float *out;
1211    int c;
1212
1213    assert(width > 0);
1214    assert(height > 0);
1215
1216    addr.value = 0;
1217    addr.bits.level = args->level;
1218
1219    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1220    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1221
1222    out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
1223    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1224       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1225
1226    if (DEBUG_TEX) {
1227       print_sample(__FUNCTION__, rgba);
1228    }
1229 }
1230
1231
1232 static void
1233 img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
1234                             const struct sp_sampler *sp_samp,
1235                             const struct img_filter_args *args,
1236                             float *rgba)
1237 {
1238    const struct pipe_resource *texture = sp_sview->base.texture;
1239    const int width = u_minify(texture->width0, args->level);
1240    const int height = u_minify(texture->height0, args->level);
1241    const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1242                                     sp_sview->base.u.tex.last_layer);
1243    int x, y;
1244    union tex_tile_address addr;
1245    const float *out;
1246    int c;
1247
1248    assert(width > 0);
1249    assert(height > 0);
1250
1251    addr.value = 0;
1252    addr.bits.level = args->level;
1253
1254    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1255    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1256
1257    out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
1258    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1259       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1260
1261    if (DEBUG_TEX) {
1262       print_sample(__FUNCTION__, rgba);
1263    }
1264 }
1265
1266
1267 static void
1268 img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
1269                         const struct sp_sampler *sp_samp,
1270                         const struct img_filter_args *args,
1271                         float *rgba)
1272 {
1273    const struct pipe_resource *texture = sp_sview->base.texture;
1274    const int width = u_minify(texture->width0, args->level);
1275    const int height = u_minify(texture->height0, args->level);
1276    const int layerface = args->face_id + sp_sview->base.u.tex.first_layer;
1277    int x, y;
1278    union tex_tile_address addr;
1279    const float *out;
1280    int c;
1281
1282    assert(width > 0);
1283    assert(height > 0);
1284
1285    addr.value = 0;
1286    addr.bits.level = args->level;
1287
1288    /*
1289     * If NEAREST filtering is done within a miplevel, always apply wrap
1290     * mode CLAMP_TO_EDGE.
1291     */
1292    if (sp_samp->base.seamless_cube_map) {
1293       wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x);
1294       wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y);
1295    } else {
1296       /* Would probably make sense to ignore mode and just do edge clamp */
1297       sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1298       sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1299    }
1300
1301    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1302    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1303       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1304
1305    if (DEBUG_TEX) {
1306       print_sample(__FUNCTION__, rgba);
1307    }
1308 }
1309
1310 static void
1311 img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
1312                               const struct sp_sampler *sp_samp,
1313                               const struct img_filter_args *args,
1314                               float *rgba)
1315 {
1316    const struct pipe_resource *texture = sp_sview->base.texture;
1317    const int width = u_minify(texture->width0, args->level);
1318    const int height = u_minify(texture->height0, args->level);
1319    const int layerface =
1320       coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1321                      sp_sview->base.u.tex.first_layer,
1322                      sp_sview->base.u.tex.last_layer - 5) + args->face_id;
1323    int x, y;
1324    union tex_tile_address addr;
1325    const float *out;
1326    int c;
1327
1328    assert(width > 0);
1329    assert(height > 0);
1330
1331    addr.value = 0;
1332    addr.bits.level = args->level;
1333
1334    sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1335    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1336
1337    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1338    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1339       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1340
1341    if (DEBUG_TEX) {
1342       print_sample(__FUNCTION__, rgba);
1343    }
1344 }
1345
1346 static void
1347 img_filter_3d_nearest(const struct sp_sampler_view *sp_sview,
1348                       const struct sp_sampler *sp_samp,
1349                       const struct img_filter_args *args,
1350                       float *rgba)
1351 {
1352    const struct pipe_resource *texture = sp_sview->base.texture;
1353    const int width = u_minify(texture->width0, args->level);
1354    const int height = u_minify(texture->height0, args->level);
1355    const int depth = u_minify(texture->depth0, args->level);
1356    int x, y, z;
1357    union tex_tile_address addr;
1358    const float *out;
1359    int c;
1360
1361    assert(width > 0);
1362    assert(height > 0);
1363    assert(depth > 0);
1364
1365    sp_samp->nearest_texcoord_s(args->s, width,  args->offset[0], &x);
1366    sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1367    sp_samp->nearest_texcoord_p(args->p, depth,  args->offset[2], &z);
1368
1369    addr.value = 0;
1370    addr.bits.level = args->level;
1371
1372    out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
1373    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1374       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1375 }
1376
1377
1378 static void
1379 img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
1380                      const struct sp_sampler *sp_samp,
1381                      const struct img_filter_args *args,
1382                      float *rgba)
1383 {
1384    const struct pipe_resource *texture = sp_sview->base.texture;
1385    const int width = u_minify(texture->width0, args->level);
1386    int x0, x1;
1387    float xw; /* weights */
1388    union tex_tile_address addr;
1389    const float *tx0, *tx1;
1390    int c;
1391
1392    assert(width > 0);
1393
1394    addr.value = 0;
1395    addr.bits.level = args->level;
1396
1397    sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1398
1399    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
1400    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
1401
1402    /* interpolate R, G, B, A */
1403    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1404       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1405 }
1406
1407
1408 static void
1409 img_filter_1d_array_linear(const struct sp_sampler_view *sp_sview,
1410                            const struct sp_sampler *sp_samp,
1411                            const struct img_filter_args *args,
1412                            float *rgba)
1413 {
1414    const struct pipe_resource *texture = sp_sview->base.texture;
1415    const int width = u_minify(texture->width0, args->level);
1416    const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1417                                     sp_sview->base.u.tex.last_layer);
1418    int x0, x1;
1419    float xw; /* weights */
1420    union tex_tile_address addr;
1421    const float *tx0, *tx1;
1422    int c;
1423
1424    assert(width > 0);
1425
1426    addr.value = 0;
1427    addr.bits.level = args->level;
1428
1429    sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1430
1431    tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
1432    tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
1433
1434    /* interpolate R, G, B, A */
1435    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1436       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1437 }
1438
1439 /*
1440  * Retrieve the gathered value, need to convert to the
1441  * TGSI expected interface, and take component select
1442  * and swizzling into account.
1443  */
1444 static float
1445 get_gather_value(const struct sp_sampler_view *sp_sview,
1446                  int chan_in, int comp_sel,
1447                  const float *tx[4])
1448 {
1449    int chan;
1450    unsigned swizzle;
1451
1452    /*
1453     * softpipe samples in a different order
1454     * to TGSI expects, so we need to swizzle,
1455     * the samples into the correct slots.
1456     */
1457    switch (chan_in) {
1458    case 0:
1459       chan = 2;
1460       break;
1461    case 1:
1462       chan = 3;
1463       break;
1464    case 2:
1465       chan = 1;
1466       break;
1467    case 3:
1468       chan = 0;
1469       break;
1470    default:
1471       assert(0);
1472       return 0.0;
1473    }
1474
1475    /* pick which component to use for the swizzle */
1476    switch (comp_sel) {
1477    case 0:
1478       swizzle = sp_sview->base.swizzle_r;
1479       break;
1480    case 1:
1481       swizzle = sp_sview->base.swizzle_g;
1482       break;
1483    case 2:
1484       swizzle = sp_sview->base.swizzle_b;
1485       break;
1486    case 3:
1487       swizzle = sp_sview->base.swizzle_a;
1488       break;
1489    default:
1490       assert(0);
1491       return 0.0;
1492    }
1493
1494    /* get correct result using the channel and swizzle */
1495    switch (swizzle) {
1496    case PIPE_SWIZZLE_ZERO:
1497       return 0.0;
1498    case PIPE_SWIZZLE_ONE:
1499       return 1.0;
1500    default:
1501       return tx[chan][swizzle];
1502    }
1503 }
1504
1505
1506 static void
1507 img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
1508                      const struct sp_sampler *sp_samp,
1509                      const struct img_filter_args *args,
1510                      float *rgba)
1511 {
1512    const struct pipe_resource *texture = sp_sview->base.texture;
1513    const int width = u_minify(texture->width0, args->level);
1514    const int height = u_minify(texture->height0, args->level);
1515    int x0, y0, x1, y1;
1516    float xw, yw; /* weights */
1517    union tex_tile_address addr;
1518    const float *tx[4];
1519    int c;
1520
1521    assert(width > 0);
1522    assert(height > 0);
1523
1524    addr.value = 0;
1525    addr.bits.level = args->level;
1526
1527    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1528    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1529
1530    tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
1531    tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
1532    tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
1533    tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
1534
1535    if (args->gather_only) {
1536       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1537          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1538                                                       args->gather_comp,
1539                                                       tx);
1540    } else {
1541       /* interpolate R, G, B, A */
1542       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1543          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1544                                              tx[0][c], tx[1][c],
1545                                              tx[2][c], tx[3][c]);
1546    }
1547 }
1548
1549
1550 static void
1551 img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
1552                            const struct sp_sampler *sp_samp,
1553                            const struct img_filter_args *args,
1554                            float *rgba)
1555 {
1556    const struct pipe_resource *texture = sp_sview->base.texture;
1557    const int width = u_minify(texture->width0, args->level);
1558    const int height = u_minify(texture->height0, args->level);
1559    const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1560                                     sp_sview->base.u.tex.last_layer);
1561    int x0, y0, x1, y1;
1562    float xw, yw; /* weights */
1563    union tex_tile_address addr;
1564    const float *tx[4];
1565    int c;
1566
1567    assert(width > 0);
1568    assert(height > 0);
1569
1570    addr.value = 0;
1571    addr.bits.level = args->level;
1572
1573    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1574    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1575
1576    tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
1577    tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
1578    tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
1579    tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
1580
1581    if (args->gather_only) {
1582       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1583          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1584                                                       args->gather_comp,
1585                                                       tx);
1586    } else {
1587       /* interpolate R, G, B, A */
1588       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1589          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1590                                              tx[0][c], tx[1][c],
1591                                              tx[2][c], tx[3][c]);
1592    }
1593 }
1594
1595
1596 static void
1597 img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
1598                        const struct sp_sampler *sp_samp,
1599                        const struct img_filter_args *args,
1600                        float *rgba)
1601 {
1602    const struct pipe_resource *texture = sp_sview->base.texture;
1603    const int width = u_minify(texture->width0, args->level);
1604    const int height = u_minify(texture->height0, args->level);
1605    const int layer = sp_sview->base.u.tex.first_layer;
1606    int x0, y0, x1, y1;
1607    float xw, yw; /* weights */
1608    union tex_tile_address addr;
1609    const float *tx[4];
1610    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1611          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1612    int c;
1613
1614    assert(width > 0);
1615    assert(height > 0);
1616
1617    addr.value = 0;
1618    addr.bits.level = args->level;
1619
1620    /*
1621     * For seamless if LINEAR filtering is done within a miplevel,
1622     * always apply wrap mode CLAMP_TO_BORDER.
1623     */
1624    if (sp_samp->base.seamless_cube_map) {
1625       /* Note this is a bit overkill, actual clamping is not required */
1626       wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1627       wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1628    } else {
1629       /* Would probably make sense to ignore mode and just do edge clamp */
1630       sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1631       sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1632    }
1633
1634    if (sp_samp->base.seamless_cube_map) {
1635       tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1636       tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1637       tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1638       tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1639    } else {
1640       tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1641       tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1642       tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1643       tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1644    }
1645
1646    if (args->gather_only) {
1647       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1648          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1649                                                       args->gather_comp,
1650                                                       tx);
1651    } else {
1652       /* interpolate R, G, B, A */
1653       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1654          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1655                                              tx[0][c], tx[1][c],
1656                                              tx[2][c], tx[3][c]);
1657    }
1658 }
1659
1660
1661 static void
1662 img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
1663                              const struct sp_sampler *sp_samp,
1664                              const struct img_filter_args *args,
1665                              float *rgba)
1666 {
1667    const struct pipe_resource *texture = sp_sview->base.texture;
1668    const int width = u_minify(texture->width0, args->level);
1669    const int height = u_minify(texture->height0, args->level);
1670    const int layer =
1671       coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1672                      sp_sview->base.u.tex.first_layer,
1673                      sp_sview->base.u.tex.last_layer - 5);
1674    int x0, y0, x1, y1;
1675    float xw, yw; /* weights */
1676    union tex_tile_address addr;
1677    const float *tx[4];
1678    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1679          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1680    int c;
1681
1682    assert(width > 0);
1683    assert(height > 0);
1684
1685    addr.value = 0;
1686    addr.bits.level = args->level;
1687
1688    /*
1689     * For seamless if LINEAR filtering is done within a miplevel,
1690     * always apply wrap mode CLAMP_TO_BORDER.
1691     */
1692    if (sp_samp->base.seamless_cube_map) {
1693       /* Note this is a bit overkill, actual clamping is not required */
1694       wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1695       wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1696    } else {
1697       /* Would probably make sense to ignore mode and just do edge clamp */
1698       sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1699       sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1700    }
1701
1702    if (sp_samp->base.seamless_cube_map) {
1703       tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1704       tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1705       tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1706       tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1707    } else {
1708       tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1709       tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1710       tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1711       tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1712    }
1713
1714    if (args->gather_only) {
1715       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1716          rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1717                                                       args->gather_comp,
1718                                                       tx);
1719    } else {
1720       /* interpolate R, G, B, A */
1721       for (c = 0; c < TGSI_QUAD_SIZE; c++)
1722          rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1723                                              tx[0][c], tx[1][c],
1724                                              tx[2][c], tx[3][c]);
1725    }
1726 }
1727
1728 static void
1729 img_filter_3d_linear(const struct sp_sampler_view *sp_sview,
1730                      const struct sp_sampler *sp_samp,
1731                      const struct img_filter_args *args,
1732                      float *rgba)
1733 {
1734    const struct pipe_resource *texture = sp_sview->base.texture;
1735    const int width = u_minify(texture->width0, args->level);
1736    const int height = u_minify(texture->height0, args->level);
1737    const int depth = u_minify(texture->depth0, args->level);
1738    int x0, x1, y0, y1, z0, z1;
1739    float xw, yw, zw; /* interpolation weights */
1740    union tex_tile_address addr;
1741    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1742    int c;
1743
1744    addr.value = 0;
1745    addr.bits.level = args->level;
1746
1747    assert(width > 0);
1748    assert(height > 0);
1749    assert(depth > 0);
1750
1751    sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1752    sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1753    sp_samp->linear_texcoord_p(args->p, depth,  args->offset[2], &z0, &z1, &zw);
1754
1755    tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
1756    tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
1757    tx02 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z0);
1758    tx03 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z0);
1759
1760    tx10 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z1);
1761    tx11 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z1);
1762    tx12 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z1);
1763    tx13 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z1);
1764
1765       /* interpolate R, G, B, A */
1766    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1767       rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1768                                            tx00[c], tx01[c],
1769                                            tx02[c], tx03[c],
1770                                            tx10[c], tx11[c],
1771                                            tx12[c], tx13[c]);
1772 }
1773
1774
1775 /* Calculate level of detail for every fragment,
1776  * with lambda already computed.
1777  * Note that lambda has already been biased by global LOD bias.
1778  * \param biased_lambda per-quad lambda.
1779  * \param lod_in per-fragment lod_bias or explicit_lod.
1780  * \param lod returns the per-fragment lod.
1781  */
1782 static inline void
1783 compute_lod(const struct pipe_sampler_state *sampler,
1784             enum tgsi_sampler_control control,
1785             const float biased_lambda,
1786             const float lod_in[TGSI_QUAD_SIZE],
1787             float lod[TGSI_QUAD_SIZE])
1788 {
1789    const float min_lod = sampler->min_lod;
1790    const float max_lod = sampler->max_lod;
1791    uint i;
1792
1793    switch (control) {
1794    case TGSI_SAMPLER_LOD_NONE:
1795    case TGSI_SAMPLER_LOD_ZERO:
1796    /* XXX FIXME */
1797    case TGSI_SAMPLER_DERIVS_EXPLICIT:
1798       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1799       break;
1800    case TGSI_SAMPLER_LOD_BIAS:
1801       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1802          lod[i] = biased_lambda + lod_in[i];
1803          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1804       }
1805       break;
1806    case TGSI_SAMPLER_LOD_EXPLICIT:
1807       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1808          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1809       }
1810       break;
1811    default:
1812       assert(0);
1813       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1814    }
1815 }
1816
1817
1818 /* Calculate level of detail for every fragment. The computed value is not
1819  * clamped to lod_min and lod_max.
1820  * \param lod_in per-fragment lod_bias or explicit_lod.
1821  * \param lod results per-fragment lod.
1822  */
1823 static inline void
1824 compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
1825                              const struct sp_sampler *sp_samp,
1826                              const float s[TGSI_QUAD_SIZE],
1827                              const float t[TGSI_QUAD_SIZE],
1828                              const float p[TGSI_QUAD_SIZE],
1829                              const float lod_in[TGSI_QUAD_SIZE],
1830                              enum tgsi_sampler_control control,
1831                              float lod[TGSI_QUAD_SIZE])
1832 {
1833    const struct pipe_sampler_state *sampler = &sp_samp->base;
1834    const float lod_bias = sampler->lod_bias;
1835    float lambda;
1836    uint i;
1837
1838    switch (control) {
1839    case TGSI_SAMPLER_LOD_NONE:
1840       /* XXX FIXME */
1841    case TGSI_SAMPLER_DERIVS_EXPLICIT:
1842       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1843       lod[0] = lod[1] = lod[2] = lod[3] = lambda;
1844       break;
1845    case TGSI_SAMPLER_LOD_BIAS:
1846       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1847       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1848          lod[i] = lambda + lod_in[i];
1849       }
1850       break;
1851    case TGSI_SAMPLER_LOD_EXPLICIT:
1852       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1853          lod[i] = lod_in[i] + lod_bias;
1854       }
1855       break;
1856    case TGSI_SAMPLER_LOD_ZERO:
1857    case TGSI_SAMPLER_GATHER:
1858       lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
1859       break;
1860    default:
1861       assert(0);
1862       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1863    }
1864 }
1865
1866 /* Calculate level of detail for every fragment.
1867  * \param lod_in per-fragment lod_bias or explicit_lod.
1868  * \param lod results per-fragment lod.
1869  */
1870 static inline void
1871 compute_lambda_lod(const struct sp_sampler_view *sp_sview,
1872                    const struct sp_sampler *sp_samp,
1873                    const float s[TGSI_QUAD_SIZE],
1874                    const float t[TGSI_QUAD_SIZE],
1875                    const float p[TGSI_QUAD_SIZE],
1876                    const float lod_in[TGSI_QUAD_SIZE],
1877                    enum tgsi_sampler_control control,
1878                    float lod[TGSI_QUAD_SIZE])
1879 {
1880    const struct pipe_sampler_state *sampler = &sp_samp->base;
1881    const float min_lod = sampler->min_lod;
1882    const float max_lod = sampler->max_lod;
1883    int i;
1884
1885    compute_lambda_lod_unclamped(sp_sview, sp_samp,
1886                                 s, t, p, lod_in, control, lod);
1887    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1888       lod[i] = CLAMP(lod[i], min_lod, max_lod);
1889    }
1890 }
1891
1892 static inline unsigned
1893 get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
1894 {
1895    /* gather component is stored in lod_in slot as unsigned */
1896    return (*(unsigned int *)lod_in) & 0x3;
1897 }
1898
1899 /**
1900  * Clamps given lod to both lod limits and mip level limits. Clamping to the
1901  * latter limits is done so that lod is relative to the first (base) level.
1902  */
1903 static void
1904 clamp_lod(const struct sp_sampler_view *sp_sview,
1905           const struct sp_sampler *sp_samp,
1906           const float lod[TGSI_QUAD_SIZE],
1907           float clamped[TGSI_QUAD_SIZE])
1908 {
1909    const float min_lod = sp_samp->base.min_lod;
1910    const float max_lod = sp_samp->base.max_lod;
1911    const float min_level = sp_sview->base.u.tex.first_level;
1912    const float max_level = sp_sview->base.u.tex.last_level;
1913    int i;
1914
1915    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1916       float cl = lod[i];
1917
1918       cl = CLAMP(cl, min_lod, max_lod);
1919       cl = CLAMP(cl, 0, max_level - min_level);
1920       clamped[i] = cl;
1921    }
1922 }
1923
1924 /**
1925  * Get mip level relative to base level for linear mip filter
1926  */
1927 static void
1928 mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
1929                      const struct sp_sampler *sp_samp,
1930                      const float lod[TGSI_QUAD_SIZE],
1931                      float level[TGSI_QUAD_SIZE])
1932 {
1933    clamp_lod(sp_sview, sp_samp, lod, level);
1934 }
1935
1936 static void
1937 mip_filter_linear(const struct sp_sampler_view *sp_sview,
1938                   const struct sp_sampler *sp_samp,
1939                   img_filter_func min_filter,
1940                   img_filter_func mag_filter,
1941                   const float s[TGSI_QUAD_SIZE],
1942                   const float t[TGSI_QUAD_SIZE],
1943                   const float p[TGSI_QUAD_SIZE],
1944                   const float c0[TGSI_QUAD_SIZE],
1945                   const float lod_in[TGSI_QUAD_SIZE],
1946                   const struct filter_args *filt_args,
1947                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1948 {
1949    const struct pipe_sampler_view *psview = &sp_sview->base;
1950    int j;
1951    float lod[TGSI_QUAD_SIZE];
1952    struct img_filter_args args;
1953
1954    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
1955
1956    args.offset = filt_args->offset;
1957    args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
1958    args.gather_comp = get_gather_component(lod_in);
1959
1960    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1961       const int level0 = psview->u.tex.first_level + (int)lod[j];
1962
1963       args.s = s[j];
1964       args.t = t[j];
1965       args.p = p[j];
1966       args.face_id = filt_args->faces[j];
1967
1968       if (lod[j] < 0.0) {
1969          args.level = psview->u.tex.first_level;
1970          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1971       }
1972       else if (level0 >= (int) psview->u.tex.last_level) {
1973          args.level = psview->u.tex.last_level;
1974          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1975       }
1976       else {
1977          float levelBlend = frac(lod[j]);
1978          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1979          int c;
1980
1981          args.level = level0;
1982          min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
1983          args.level = level0+1;
1984          min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
1985
1986          for (c = 0; c < 4; c++) {
1987             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1988          }
1989       }
1990    }
1991
1992    if (DEBUG_TEX) {
1993       print_sample_4(__FUNCTION__, rgba);
1994    }
1995 }
1996
1997
1998 /**
1999  * Get mip level relative to base level for nearest mip filter
2000  */
2001 static void
2002 mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
2003                       const struct sp_sampler *sp_samp,
2004                       const float lod[TGSI_QUAD_SIZE],
2005                       float level[TGSI_QUAD_SIZE])
2006 {
2007    int j;
2008
2009    clamp_lod(sp_sview, sp_samp, lod, level);
2010    for (j = 0; j < TGSI_QUAD_SIZE; j++)
2011       /* TODO: It should rather be:
2012        * level[j] = ceil(level[j] + 0.5F) - 1.0F;
2013        */
2014       level[j] = (int)(level[j] + 0.5F);
2015 }
2016
2017 /**
2018  * Compute nearest mipmap level from texcoords.
2019  * Then sample the texture level for four elements of a quad.
2020  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
2021  */
2022 static void
2023 mip_filter_nearest(const struct sp_sampler_view *sp_sview,
2024                    const struct sp_sampler *sp_samp,
2025                    img_filter_func min_filter,
2026                    img_filter_func mag_filter,
2027                    const float s[TGSI_QUAD_SIZE],
2028                    const float t[TGSI_QUAD_SIZE],
2029                    const float p[TGSI_QUAD_SIZE],
2030                    const float c0[TGSI_QUAD_SIZE],
2031                    const float lod_in[TGSI_QUAD_SIZE],
2032                    const struct filter_args *filt_args,
2033                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2034 {
2035    const struct pipe_sampler_view *psview = &sp_sview->base;
2036    float lod[TGSI_QUAD_SIZE];
2037    int j;
2038    struct img_filter_args args;
2039
2040    args.offset = filt_args->offset;
2041    args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2042    args.gather_comp = get_gather_component(lod_in);
2043
2044    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2045
2046    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2047       args.s = s[j];
2048       args.t = t[j];
2049       args.p = p[j];
2050       args.face_id = filt_args->faces[j];
2051
2052       if (lod[j] < 0.0) {
2053          args.level = psview->u.tex.first_level;
2054          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2055       } else {
2056          const int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
2057          args.level = MIN2(level, (int)psview->u.tex.last_level);
2058          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2059       }
2060    }
2061
2062    if (DEBUG_TEX) {
2063       print_sample_4(__FUNCTION__, rgba);
2064    }
2065 }
2066
2067
2068 /**
2069  * Get mip level relative to base level for none mip filter
2070  */
2071 static void
2072 mip_rel_level_none(const struct sp_sampler_view *sp_sview,
2073                    const struct sp_sampler *sp_samp,
2074                    const float lod[TGSI_QUAD_SIZE],
2075                    float level[TGSI_QUAD_SIZE])
2076 {
2077    int j;
2078
2079    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2080       level[j] = 0;
2081    }
2082 }
2083
2084 static void
2085 mip_filter_none(const struct sp_sampler_view *sp_sview,
2086                 const struct sp_sampler *sp_samp,
2087                 img_filter_func min_filter,
2088                 img_filter_func mag_filter,
2089                 const float s[TGSI_QUAD_SIZE],
2090                 const float t[TGSI_QUAD_SIZE],
2091                 const float p[TGSI_QUAD_SIZE],
2092                 const float c0[TGSI_QUAD_SIZE],
2093                 const float lod_in[TGSI_QUAD_SIZE],
2094                 const struct filter_args *filt_args,
2095                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2096 {
2097    float lod[TGSI_QUAD_SIZE];
2098    int j;
2099    struct img_filter_args args;
2100
2101    args.level = sp_sview->base.u.tex.first_level;
2102    args.offset = filt_args->offset;
2103    args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2104
2105    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2106
2107    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2108       args.s = s[j];
2109       args.t = t[j];
2110       args.p = p[j];
2111       args.face_id = filt_args->faces[j];
2112       if (lod[j] < 0.0) {
2113          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2114       }
2115       else {
2116          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2117       }
2118    }
2119 }
2120
2121
2122 /**
2123  * Get mip level relative to base level for none mip filter
2124  */
2125 static void
2126 mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
2127                                     const struct sp_sampler *sp_samp,
2128                                     const float lod[TGSI_QUAD_SIZE],
2129                                     float level[TGSI_QUAD_SIZE])
2130 {
2131    mip_rel_level_none(sp_sview, sp_samp, lod, level);
2132 }
2133
2134 static void
2135 mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
2136                                  const struct sp_sampler *sp_samp,
2137                                  img_filter_func min_filter,
2138                                  img_filter_func mag_filter,
2139                                  const float s[TGSI_QUAD_SIZE],
2140                                  const float t[TGSI_QUAD_SIZE],
2141                                  const float p[TGSI_QUAD_SIZE],
2142                                  const float c0[TGSI_QUAD_SIZE],
2143                                  const float lod_in[TGSI_QUAD_SIZE],
2144                                  const struct filter_args *filt_args,
2145                                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2146 {
2147    int j;
2148    struct img_filter_args args;
2149    args.level = sp_sview->base.u.tex.first_level;
2150    args.offset = filt_args->offset;
2151    args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2152    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2153       args.s = s[j];
2154       args.t = t[j];
2155       args.p = p[j];
2156       args.face_id = filt_args->faces[j];
2157       mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2158    }
2159 }
2160
2161
2162 /* For anisotropic filtering */
2163 #define WEIGHT_LUT_SIZE 1024
2164
2165 static const float *weightLut = NULL;
2166
2167 /**
2168  * Creates the look-up table used to speed-up EWA sampling
2169  */
2170 static void
2171 create_filter_table(void)
2172 {
2173    unsigned i;
2174    if (!weightLut) {
2175       float *lut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
2176
2177       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
2178          const float alpha = 2;
2179          const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2180          const float weight = (float) exp(-alpha * r2);
2181          lut[i] = weight;
2182       }
2183       weightLut = lut;
2184    }
2185 }
2186
2187
2188 /**
2189  * Elliptical weighted average (EWA) filter for producing high quality
2190  * anisotropic filtered results.
2191  * Based on the Higher Quality Elliptical Weighted Average Filter
2192  * published by Paul S. Heckbert in his Master's Thesis
2193  * "Fundamentals of Texture Mapping and Image Warping" (1989)
2194  */
2195 static void
2196 img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
2197                   const struct sp_sampler *sp_samp,
2198                   img_filter_func min_filter,
2199                   img_filter_func mag_filter,
2200                   const float s[TGSI_QUAD_SIZE],
2201                   const float t[TGSI_QUAD_SIZE],
2202                   const float p[TGSI_QUAD_SIZE],
2203                   const uint faces[TGSI_QUAD_SIZE],
2204                   unsigned level,
2205                   const float dudx, const float dvdx,
2206                   const float dudy, const float dvdy,
2207                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2208 {
2209    const struct pipe_resource *texture = sp_sview->base.texture;
2210
2211    // ??? Won't the image filters blow up if level is negative?
2212    const unsigned level0 = level > 0 ? level : 0;
2213    const float scaling = 1.0f / (1 << level0);
2214    const int width = u_minify(texture->width0, level0);
2215    const int height = u_minify(texture->height0, level0);
2216    struct img_filter_args args;
2217    const float ux = dudx * scaling;
2218    const float vx = dvdx * scaling;
2219    const float uy = dudy * scaling;
2220    const float vy = dvdy * scaling;
2221
2222    /* compute ellipse coefficients to bound the region:
2223     * A*x*x + B*x*y + C*y*y = F.
2224     */
2225    float A = vx*vx+vy*vy+1;
2226    float B = -2*(ux*vx+uy*vy);
2227    float C = ux*ux+uy*uy+1;
2228    float F = A*C-B*B/4.0f;
2229
2230    /* check if it is an ellipse */
2231    /* assert(F > 0.0); */
2232
2233    /* Compute the ellipse's (u,v) bounding box in texture space */
2234    const float d = -B*B+4.0f*C*A;
2235    const float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
2236    const float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
2237
2238    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2239    float s_buffer[TGSI_QUAD_SIZE];
2240    float t_buffer[TGSI_QUAD_SIZE];
2241    float weight_buffer[TGSI_QUAD_SIZE];
2242    int j;
2243
2244    /* For each quad, the du and dx values are the same and so the ellipse is
2245     * also the same. Note that texel/image access can only be performed using
2246     * a quad, i.e. it is not possible to get the pixel value for a single
2247     * tex coord. In order to have a better performance, the access is buffered
2248     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
2249     * full, then the pixel values are read from the image.
2250     */
2251    const float ddq = 2 * A;
2252
2253    /* Scale ellipse formula to directly index the Filter Lookup Table.
2254     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2255     */
2256    const double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
2257    A *= formScale;
2258    B *= formScale;
2259    C *= formScale;
2260    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2261
2262    args.level = level;
2263    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2264       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2265        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2266        * value, q, is less than F, we're inside the ellipse
2267        */
2268       const float tex_u = -0.5F + s[j] * texture->width0 * scaling;
2269       const float tex_v = -0.5F + t[j] * texture->height0 * scaling;
2270
2271       const int u0 = (int) floorf(tex_u - box_u);
2272       const int u1 = (int) ceilf(tex_u + box_u);
2273       const int v0 = (int) floorf(tex_v - box_v);
2274       const int v1 = (int) ceilf(tex_v + box_v);
2275       const float U = u0 - tex_u;
2276
2277       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
2278       unsigned buffer_next = 0;
2279       float den = 0;
2280       int v;
2281       args.face_id = faces[j];
2282
2283       for (v = v0; v <= v1; ++v) {
2284          const float V = v - tex_v;
2285          float dq = A * (2 * U + 1) + B * V;
2286          float q = (C * V + B * U) * V + A * U * U;
2287
2288          int u;
2289          for (u = u0; u <= u1; ++u) {
2290             /* Note that the ellipse has been pre-scaled so F =
2291              * WEIGHT_LUT_SIZE - 1
2292              */
2293             if (q < WEIGHT_LUT_SIZE) {
2294                /* as a LUT is used, q must never be negative;
2295                 * should not happen, though
2296                 */
2297                const int qClamped = q >= 0.0F ? q : 0;
2298                const float weight = weightLut[qClamped];
2299
2300                weight_buffer[buffer_next] = weight;
2301                s_buffer[buffer_next] = u / ((float) width);
2302                t_buffer[buffer_next] = v / ((float) height);
2303
2304                buffer_next++;
2305                if (buffer_next == TGSI_QUAD_SIZE) {
2306                   /* 4 texel coords are in the buffer -> read it now */
2307                   unsigned jj;
2308                   /* it is assumed that samp->min_img_filter is set to
2309                    * img_filter_2d_nearest or one of the
2310                    * accelerated img_filter_2d_nearest_XXX functions.
2311                    */
2312                   for (jj = 0; jj < buffer_next; jj++) {
2313                      args.s = s_buffer[jj];
2314                      args.t = t_buffer[jj];
2315                      args.p = p[jj];
2316                      min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2317                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2318                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2319                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2320                      num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2321                   }
2322
2323                   buffer_next = 0;
2324                }
2325
2326                den += weight;
2327             }
2328             q += dq;
2329             dq += ddq;
2330          }
2331       }
2332
2333       /* if the tex coord buffer contains unread values, we will read
2334        * them now.
2335        */
2336       if (buffer_next > 0) {
2337          unsigned jj;
2338          /* it is assumed that samp->min_img_filter is set to
2339           * img_filter_2d_nearest or one of the
2340           * accelerated img_filter_2d_nearest_XXX functions.
2341           */
2342          for (jj = 0; jj < buffer_next; jj++) {
2343             args.s = s_buffer[jj];
2344             args.t = t_buffer[jj];
2345             args.p = p[jj];
2346             min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2347             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2348             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2349             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2350             num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2351          }
2352       }
2353
2354       if (den <= 0.0F) {
2355          /* Reaching this place would mean that no pixels intersected
2356           * the ellipse.  This should never happen because the filter
2357           * we use always intersects at least one pixel.
2358           */
2359
2360          /*rgba[0]=0;
2361          rgba[1]=0;
2362          rgba[2]=0;
2363          rgba[3]=0;*/
2364          /* not enough pixels in resampling, resort to direct interpolation */
2365          args.s = s[j];
2366          args.t = t[j];
2367          args.p = p[j];
2368          min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
2369          den = 1;
2370          num[0] = rgba_temp[0][j];
2371          num[1] = rgba_temp[1][j];
2372          num[2] = rgba_temp[2][j];
2373          num[3] = rgba_temp[3][j];
2374       }
2375
2376       rgba[0][j] = num[0] / den;
2377       rgba[1][j] = num[1] / den;
2378       rgba[2][j] = num[2] / den;
2379       rgba[3][j] = num[3] / den;
2380    }
2381 }
2382
2383
2384 /**
2385  * Get mip level relative to base level for linear mip filter
2386  */
2387 static void
2388 mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
2389                            const struct sp_sampler *sp_samp,
2390                            const float lod[TGSI_QUAD_SIZE],
2391                            float level[TGSI_QUAD_SIZE])
2392 {
2393    mip_rel_level_linear(sp_sview, sp_samp, lod, level);
2394 }
2395
2396 /**
2397  * Sample 2D texture using an anisotropic filter.
2398  */
2399 static void
2400 mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
2401                         const struct sp_sampler *sp_samp,
2402                         img_filter_func min_filter,
2403                         img_filter_func mag_filter,
2404                         const float s[TGSI_QUAD_SIZE],
2405                         const float t[TGSI_QUAD_SIZE],
2406                         const float p[TGSI_QUAD_SIZE],
2407                         const float c0[TGSI_QUAD_SIZE],
2408                         const float lod_in[TGSI_QUAD_SIZE],
2409                         const struct filter_args *filt_args,
2410                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2411 {
2412    const struct pipe_resource *texture = sp_sview->base.texture;
2413    const struct pipe_sampler_view *psview = &sp_sview->base;
2414    int level0;
2415    float lambda;
2416    float lod[TGSI_QUAD_SIZE];
2417
2418    const float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
2419    const float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
2420    const float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2421    const float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2422    const float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2423    const float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2424    struct img_filter_args args;
2425
2426    if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
2427        filt_args->control == TGSI_SAMPLER_LOD_NONE ||
2428        /* XXX FIXME */
2429        filt_args->control == TGSI_SAMPLER_DERIVS_EXPLICIT) {
2430       /* note: instead of working with Px and Py, we will use the
2431        * squared length instead, to avoid sqrt.
2432        */
2433       const float Px2 = dudx * dudx + dvdx * dvdx;
2434       const float Py2 = dudy * dudy + dvdy * dvdy;
2435
2436       float Pmax2;
2437       float Pmin2;
2438       float e;
2439       const float maxEccentricity = sp_samp->base.max_anisotropy * sp_samp->base.max_anisotropy;
2440
2441       if (Px2 < Py2) {
2442          Pmax2 = Py2;
2443          Pmin2 = Px2;
2444       }
2445       else {
2446          Pmax2 = Px2;
2447          Pmin2 = Py2;
2448       }
2449
2450       /* if the eccentricity of the ellipse is too big, scale up the shorter
2451        * of the two vectors to limit the maximum amount of work per pixel
2452        */
2453       e = Pmax2 / Pmin2;
2454       if (e > maxEccentricity) {
2455          /* float s=e / maxEccentricity;
2456             minor[0] *= s;
2457             minor[1] *= s;
2458             Pmin2 *= s; */
2459          Pmin2 = Pmax2 / maxEccentricity;
2460       }
2461
2462       /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2463        * this since 0.5*log(x) = log(sqrt(x))
2464        */
2465       lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
2466       compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
2467    }
2468    else {
2469       assert(filt_args->control == TGSI_SAMPLER_LOD_EXPLICIT ||
2470              filt_args->control == TGSI_SAMPLER_LOD_ZERO);
2471       compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
2472    }
2473
2474    /* XXX: Take into account all lod values.
2475     */
2476    lambda = lod[0];
2477    level0 = psview->u.tex.first_level + (int)lambda;
2478
2479    /* If the ellipse covers the whole image, we can
2480     * simply return the average of the whole image.
2481     */
2482    if (level0 >= (int) psview->u.tex.last_level) {
2483       int j;
2484       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2485          args.s = s[j];
2486          args.t = t[j];
2487          args.p = p[j];
2488          args.level = psview->u.tex.last_level;
2489          args.face_id = filt_args->faces[j];
2490          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2491       }
2492    }
2493    else {
2494       /* don't bother interpolating between multiple LODs; it doesn't
2495        * seem to be worth the extra running time.
2496        */
2497       img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
2498                         s, t, p, filt_args->faces, level0,
2499                         dudx, dvdx, dudy, dvdy, rgba);
2500    }
2501
2502    if (DEBUG_TEX) {
2503       print_sample_4(__FUNCTION__, rgba);
2504    }
2505 }
2506
2507 /**
2508  * Get mip level relative to base level for linear mip filter
2509  */
2510 static void
2511 mip_rel_level_linear_2d_linear_repeat_POT(
2512    const struct sp_sampler_view *sp_sview,
2513    const struct sp_sampler *sp_samp,
2514    const float lod[TGSI_QUAD_SIZE],
2515    float level[TGSI_QUAD_SIZE])
2516 {
2517    mip_rel_level_linear(sp_sview, sp_samp, lod, level);
2518 }
2519
2520 /**
2521  * Specialized version of mip_filter_linear with hard-wired calls to
2522  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2523  */
2524 static void
2525 mip_filter_linear_2d_linear_repeat_POT(
2526    const struct sp_sampler_view *sp_sview,
2527    const struct sp_sampler *sp_samp,
2528    img_filter_func min_filter,
2529    img_filter_func mag_filter,
2530    const float s[TGSI_QUAD_SIZE],
2531    const float t[TGSI_QUAD_SIZE],
2532    const float p[TGSI_QUAD_SIZE],
2533    const float c0[TGSI_QUAD_SIZE],
2534    const float lod_in[TGSI_QUAD_SIZE],
2535    const struct filter_args *filt_args,
2536    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2537 {
2538    const struct pipe_sampler_view *psview = &sp_sview->base;
2539    int j;
2540    float lod[TGSI_QUAD_SIZE];
2541
2542    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2543
2544    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2545       const int level0 = psview->u.tex.first_level + (int)lod[j];
2546       struct img_filter_args args;
2547       /* Catches both negative and large values of level0:
2548        */
2549       args.s = s[j];
2550       args.t = t[j];
2551       args.p = p[j];
2552       args.face_id = filt_args->faces[j];
2553       args.offset = filt_args->offset;
2554       args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2555       if ((unsigned)level0 >= psview->u.tex.last_level) {
2556          if (level0 < 0)
2557             args.level = psview->u.tex.first_level;
2558          else
2559             args.level = psview->u.tex.last_level;
2560          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
2561                                          &rgba[0][j]);
2562
2563       }
2564       else {
2565          const float levelBlend = frac(lod[j]);
2566          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2567          int c;
2568
2569          args.level = level0;
2570          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
2571          args.level = level0+1;
2572          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
2573
2574          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2575             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2576       }
2577    }
2578
2579    if (DEBUG_TEX) {
2580       print_sample_4(__FUNCTION__, rgba);
2581    }
2582 }
2583
2584 static const struct sp_filter_funcs funcs_linear = {
2585    mip_rel_level_linear,
2586    mip_filter_linear
2587 };
2588
2589 static const struct sp_filter_funcs funcs_nearest = {
2590    mip_rel_level_nearest,
2591    mip_filter_nearest
2592 };
2593
2594 static const struct sp_filter_funcs funcs_none = {
2595    mip_rel_level_none,
2596    mip_filter_none
2597 };
2598
2599 static const struct sp_filter_funcs funcs_none_no_filter_select = {
2600    mip_rel_level_none_no_filter_select,
2601    mip_filter_none_no_filter_select
2602 };
2603
2604 static const struct sp_filter_funcs funcs_linear_aniso = {
2605    mip_rel_level_linear_aniso,
2606    mip_filter_linear_aniso
2607 };
2608
2609 static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
2610    mip_rel_level_linear_2d_linear_repeat_POT,
2611    mip_filter_linear_2d_linear_repeat_POT
2612 };
2613
2614 /**
2615  * Do shadow/depth comparisons.
2616  */
2617 static void
2618 sample_compare(const struct sp_sampler_view *sp_sview,
2619                const struct sp_sampler *sp_samp,
2620                const float s[TGSI_QUAD_SIZE],
2621                const float t[TGSI_QUAD_SIZE],
2622                const float p[TGSI_QUAD_SIZE],
2623                const float c0[TGSI_QUAD_SIZE],
2624                const float c1[TGSI_QUAD_SIZE],
2625                enum tgsi_sampler_control control,
2626                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2627 {
2628    const struct pipe_sampler_state *sampler = &sp_samp->base;
2629    int j, v;
2630    int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2631    float pc[4];
2632    const struct util_format_description *format_desc =
2633       util_format_description(sp_sview->base.format);
2634    /* not entirely sure we couldn't end up with non-valid swizzle here */
2635    const unsigned chan_type =
2636       format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
2637       format_desc->channel[format_desc->swizzle[0]].type :
2638       UTIL_FORMAT_TYPE_FLOAT;
2639    const bool is_gather = (control == TGSI_SAMPLER_GATHER);
2640
2641    /**
2642     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2643     * for 2D Array texture we need to use the 'c0' (aka Q).
2644     * When we sampled the depth texture, the depth value was put into all
2645     * RGBA channels.  We look at the red channel here.
2646     */
2647
2648    if (sp_sview->base.target == PIPE_TEXTURE_2D_ARRAY ||
2649        sp_sview->base.target == PIPE_TEXTURE_CUBE) {
2650       pc[0] = c0[0];
2651       pc[1] = c0[1];
2652       pc[2] = c0[2];
2653       pc[3] = c0[3];
2654    } else if (sp_sview->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
2655       pc[0] = c1[0];
2656       pc[1] = c1[1];
2657       pc[2] = c1[2];
2658       pc[3] = c1[3];
2659    } else {
2660       pc[0] = p[0];
2661       pc[1] = p[1];
2662       pc[2] = p[2];
2663       pc[3] = p[3];
2664    }
2665
2666    if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2667       /*
2668        * clamping is a result of conversion to texture format, hence
2669        * doesn't happen with floats. Technically also should do comparison
2670        * in texture format (quantization!).
2671        */
2672       pc[0] = CLAMP(pc[0], 0.0F, 1.0F);
2673       pc[1] = CLAMP(pc[1], 0.0F, 1.0F);
2674       pc[2] = CLAMP(pc[2], 0.0F, 1.0F);
2675       pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
2676    }
2677
2678    for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
2679       /* compare four texcoords vs. four texture samples */
2680       switch (sampler->compare_func) {
2681       case PIPE_FUNC_LESS:
2682          k[v][0] = pc[0] < rgba[v][0];
2683          k[v][1] = pc[1] < rgba[v][1];
2684          k[v][2] = pc[2] < rgba[v][2];
2685          k[v][3] = pc[3] < rgba[v][3];
2686          break;
2687       case PIPE_FUNC_LEQUAL:
2688          k[v][0] = pc[0] <= rgba[v][0];
2689          k[v][1] = pc[1] <= rgba[v][1];
2690          k[v][2] = pc[2] <= rgba[v][2];
2691          k[v][3] = pc[3] <= rgba[v][3];
2692          break;
2693       case PIPE_FUNC_GREATER:
2694          k[v][0] = pc[0] > rgba[v][0];
2695          k[v][1] = pc[1] > rgba[v][1];
2696          k[v][2] = pc[2] > rgba[v][2];
2697          k[v][3] = pc[3] > rgba[v][3];
2698          break;
2699       case PIPE_FUNC_GEQUAL:
2700          k[v][0] = pc[0] >= rgba[v][0];
2701          k[v][1] = pc[1] >= rgba[v][1];
2702          k[v][2] = pc[2] >= rgba[v][2];
2703          k[v][3] = pc[3] >= rgba[v][3];
2704          break;
2705       case PIPE_FUNC_EQUAL:
2706          k[v][0] = pc[0] == rgba[v][0];
2707          k[v][1] = pc[1] == rgba[v][1];
2708          k[v][2] = pc[2] == rgba[v][2];
2709          k[v][3] = pc[3] == rgba[v][3];
2710          break;
2711       case PIPE_FUNC_NOTEQUAL:
2712          k[v][0] = pc[0] != rgba[v][0];
2713          k[v][1] = pc[1] != rgba[v][1];
2714          k[v][2] = pc[2] != rgba[v][2];
2715          k[v][3] = pc[3] != rgba[v][3];
2716          break;
2717       case PIPE_FUNC_ALWAYS:
2718          k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
2719          break;
2720       case PIPE_FUNC_NEVER:
2721          k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2722          break;
2723       default:
2724          k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2725          assert(0);
2726          break;
2727       }
2728    }
2729
2730    if (is_gather) {
2731       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2732          for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
2733             rgba[v][j] = k[v][j];
2734          }
2735       }
2736    } else {
2737       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2738          rgba[0][j] = k[0][j];
2739          rgba[1][j] = k[0][j];
2740          rgba[2][j] = k[0][j];
2741          rgba[3][j] = 1.0F;
2742       }
2743    }
2744 }
2745
2746 static void
2747 do_swizzling(const struct pipe_sampler_view *sview,
2748              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2749              float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2750 {
2751    int j;
2752    const unsigned swizzle_r = sview->swizzle_r;
2753    const unsigned swizzle_g = sview->swizzle_g;
2754    const unsigned swizzle_b = sview->swizzle_b;
2755    const unsigned swizzle_a = sview->swizzle_a;
2756
2757    switch (swizzle_r) {
2758    case PIPE_SWIZZLE_ZERO:
2759       for (j = 0; j < 4; j++)
2760          out[0][j] = 0.0f;
2761       break;
2762    case PIPE_SWIZZLE_ONE:
2763       for (j = 0; j < 4; j++)
2764          out[0][j] = 1.0f;
2765       break;
2766    default:
2767       assert(swizzle_r < 4);
2768       for (j = 0; j < 4; j++)
2769          out[0][j] = in[swizzle_r][j];
2770    }
2771
2772    switch (swizzle_g) {
2773    case PIPE_SWIZZLE_ZERO:
2774       for (j = 0; j < 4; j++)
2775          out[1][j] = 0.0f;
2776       break;
2777    case PIPE_SWIZZLE_ONE:
2778       for (j = 0; j < 4; j++)
2779          out[1][j] = 1.0f;
2780       break;
2781    default:
2782       assert(swizzle_g < 4);
2783       for (j = 0; j < 4; j++)
2784          out[1][j] = in[swizzle_g][j];
2785    }
2786
2787    switch (swizzle_b) {
2788    case PIPE_SWIZZLE_ZERO:
2789       for (j = 0; j < 4; j++)
2790          out[2][j] = 0.0f;
2791       break;
2792    case PIPE_SWIZZLE_ONE:
2793       for (j = 0; j < 4; j++)
2794          out[2][j] = 1.0f;
2795       break;
2796    default:
2797       assert(swizzle_b < 4);
2798       for (j = 0; j < 4; j++)
2799          out[2][j] = in[swizzle_b][j];
2800    }
2801
2802    switch (swizzle_a) {
2803    case PIPE_SWIZZLE_ZERO:
2804       for (j = 0; j < 4; j++)
2805          out[3][j] = 0.0f;
2806       break;
2807    case PIPE_SWIZZLE_ONE:
2808       for (j = 0; j < 4; j++)
2809          out[3][j] = 1.0f;
2810       break;
2811    default:
2812       assert(swizzle_a < 4);
2813       for (j = 0; j < 4; j++)
2814          out[3][j] = in[swizzle_a][j];
2815    }
2816 }
2817
2818
2819 static wrap_nearest_func
2820 get_nearest_unorm_wrap(unsigned mode)
2821 {
2822    switch (mode) {
2823    case PIPE_TEX_WRAP_CLAMP:
2824       return wrap_nearest_unorm_clamp;
2825    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2826       return wrap_nearest_unorm_clamp_to_edge;
2827    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2828       return wrap_nearest_unorm_clamp_to_border;
2829    default:
2830       debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2831       return wrap_nearest_unorm_clamp;
2832    }
2833 }
2834
2835
2836 static wrap_nearest_func
2837 get_nearest_wrap(unsigned mode)
2838 {
2839    switch (mode) {
2840    case PIPE_TEX_WRAP_REPEAT:
2841       return wrap_nearest_repeat;
2842    case PIPE_TEX_WRAP_CLAMP:
2843       return wrap_nearest_clamp;
2844    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2845       return wrap_nearest_clamp_to_edge;
2846    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2847       return wrap_nearest_clamp_to_border;
2848    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2849       return wrap_nearest_mirror_repeat;
2850    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2851       return wrap_nearest_mirror_clamp;
2852    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2853       return wrap_nearest_mirror_clamp_to_edge;
2854    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2855       return wrap_nearest_mirror_clamp_to_border;
2856    default:
2857       assert(0);
2858       return wrap_nearest_repeat;
2859    }
2860 }
2861
2862
2863 static wrap_linear_func
2864 get_linear_unorm_wrap(unsigned mode)
2865 {
2866    switch (mode) {
2867    case PIPE_TEX_WRAP_CLAMP:
2868       return wrap_linear_unorm_clamp;
2869    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2870       return wrap_linear_unorm_clamp_to_edge;
2871    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2872       return wrap_linear_unorm_clamp_to_border;
2873    default:
2874       debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2875       return wrap_linear_unorm_clamp;
2876    }
2877 }
2878
2879
2880 static wrap_linear_func
2881 get_linear_wrap(unsigned mode)
2882 {
2883    switch (mode) {
2884    case PIPE_TEX_WRAP_REPEAT:
2885       return wrap_linear_repeat;
2886    case PIPE_TEX_WRAP_CLAMP:
2887       return wrap_linear_clamp;
2888    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2889       return wrap_linear_clamp_to_edge;
2890    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2891       return wrap_linear_clamp_to_border;
2892    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2893       return wrap_linear_mirror_repeat;
2894    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2895       return wrap_linear_mirror_clamp;
2896    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2897       return wrap_linear_mirror_clamp_to_edge;
2898    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2899       return wrap_linear_mirror_clamp_to_border;
2900    default:
2901       assert(0);
2902       return wrap_linear_repeat;
2903    }
2904 }
2905
2906
2907 /**
2908  * Is swizzling needed for the given state key?
2909  */
2910 static inline bool
2911 any_swizzle(const struct pipe_sampler_view *view)
2912 {
2913    return (view->swizzle_r != PIPE_SWIZZLE_RED ||
2914            view->swizzle_g != PIPE_SWIZZLE_GREEN ||
2915            view->swizzle_b != PIPE_SWIZZLE_BLUE ||
2916            view->swizzle_a != PIPE_SWIZZLE_ALPHA);
2917 }
2918
2919
2920 static img_filter_func
2921 get_img_filter(const struct sp_sampler_view *sp_sview,
2922                const struct pipe_sampler_state *sampler,
2923                unsigned filter, bool gather)
2924 {
2925    switch (sp_sview->base.target) {
2926    case PIPE_BUFFER:
2927    case PIPE_TEXTURE_1D:
2928       if (filter == PIPE_TEX_FILTER_NEAREST)
2929          return img_filter_1d_nearest;
2930       else
2931          return img_filter_1d_linear;
2932       break;
2933    case PIPE_TEXTURE_1D_ARRAY:
2934       if (filter == PIPE_TEX_FILTER_NEAREST)
2935          return img_filter_1d_array_nearest;
2936       else
2937          return img_filter_1d_array_linear;
2938       break;
2939    case PIPE_TEXTURE_2D:
2940    case PIPE_TEXTURE_RECT:
2941       /* Try for fast path:
2942        */
2943       if (!gather && sp_sview->pot2d &&
2944           sampler->wrap_s == sampler->wrap_t &&
2945           sampler->normalized_coords)
2946       {
2947          switch (sampler->wrap_s) {
2948          case PIPE_TEX_WRAP_REPEAT:
2949             switch (filter) {
2950             case PIPE_TEX_FILTER_NEAREST:
2951                return img_filter_2d_nearest_repeat_POT;
2952             case PIPE_TEX_FILTER_LINEAR:
2953                return img_filter_2d_linear_repeat_POT;
2954             default:
2955                break;
2956             }
2957             break;
2958          case PIPE_TEX_WRAP_CLAMP:
2959             switch (filter) {
2960             case PIPE_TEX_FILTER_NEAREST:
2961                return img_filter_2d_nearest_clamp_POT;
2962             default:
2963                break;
2964             }
2965          }
2966       }
2967       /* Otherwise use default versions:
2968        */
2969       if (filter == PIPE_TEX_FILTER_NEAREST)
2970          return img_filter_2d_nearest;
2971       else
2972          return img_filter_2d_linear;
2973       break;
2974    case PIPE_TEXTURE_2D_ARRAY:
2975       if (filter == PIPE_TEX_FILTER_NEAREST)
2976          return img_filter_2d_array_nearest;
2977       else
2978          return img_filter_2d_array_linear;
2979       break;
2980    case PIPE_TEXTURE_CUBE:
2981       if (filter == PIPE_TEX_FILTER_NEAREST)
2982          return img_filter_cube_nearest;
2983       else
2984          return img_filter_cube_linear;
2985       break;
2986    case PIPE_TEXTURE_CUBE_ARRAY:
2987       if (filter == PIPE_TEX_FILTER_NEAREST)
2988          return img_filter_cube_array_nearest;
2989       else
2990          return img_filter_cube_array_linear;
2991       break;
2992    case PIPE_TEXTURE_3D:
2993       if (filter == PIPE_TEX_FILTER_NEAREST)
2994          return img_filter_3d_nearest;
2995       else
2996          return img_filter_3d_linear;
2997       break;
2998    default:
2999       assert(0);
3000       return img_filter_1d_nearest;
3001    }
3002 }
3003
3004 /**
3005  * Get mip filter funcs, and optionally both img min filter and img mag
3006  * filter. Note that both img filter function pointers must be either non-NULL
3007  * or NULL.
3008  */
3009 static void
3010 get_filters(const struct sp_sampler_view *sp_sview,
3011             const struct sp_sampler *sp_samp,
3012             const enum tgsi_sampler_control control,
3013             const struct sp_filter_funcs **funcs,
3014             img_filter_func *min,
3015             img_filter_func *mag)
3016 {
3017    assert(funcs);
3018    if (control == TGSI_SAMPLER_GATHER) {
3019       *funcs = &funcs_nearest;
3020       if (min) {
3021          *min = get_img_filter(sp_sview, &sp_samp->base,
3022                                PIPE_TEX_FILTER_LINEAR, true);
3023       }
3024    } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
3025       *funcs = &funcs_linear_2d_linear_repeat_POT;
3026    } else {
3027       *funcs = sp_samp->filter_funcs;
3028       if (min) {
3029          assert(mag);
3030          *min = get_img_filter(sp_sview, &sp_samp->base,
3031                                sp_samp->min_img_filter, false);
3032          if (sp_samp->min_mag_equal) {
3033             *mag = *min;
3034          } else {
3035             *mag = get_img_filter(sp_sview, &sp_samp->base,
3036                                   sp_samp->base.mag_img_filter, false);
3037          }
3038       }
3039    }
3040 }
3041
3042 static void
3043 sample_mip(const struct sp_sampler_view *sp_sview,
3044            const struct sp_sampler *sp_samp,
3045            const float s[TGSI_QUAD_SIZE],
3046            const float t[TGSI_QUAD_SIZE],
3047            const float p[TGSI_QUAD_SIZE],
3048            const float c0[TGSI_QUAD_SIZE],
3049            const float lod[TGSI_QUAD_SIZE],
3050            const struct filter_args *filt_args,
3051            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3052 {
3053    const struct sp_filter_funcs *funcs = NULL;
3054    img_filter_func min_img_filter = NULL;
3055    img_filter_func mag_img_filter = NULL;
3056
3057    get_filters(sp_sview, sp_samp, filt_args->control,
3058                &funcs, &min_img_filter, &mag_img_filter);
3059
3060    funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
3061                  s, t, p, c0, lod, filt_args, rgba);
3062
3063    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
3064       sample_compare(sp_sview, sp_samp, s, t, p, c0,
3065                      lod, filt_args->control, rgba);
3066    }
3067
3068    if (sp_sview->need_swizzle && filt_args->control != TGSI_SAMPLER_GATHER) {
3069       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3070       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3071       do_swizzling(&sp_sview->base, rgba_temp, rgba);
3072    }
3073
3074 }
3075
3076
3077 /**
3078  * This function uses cube texture coordinates to choose a face of a cube and
3079  * computes the 2D cube face coordinates. Puts face info into the sampler
3080  * faces[] array.
3081  */
3082 static void
3083 convert_cube(const struct sp_sampler_view *sp_sview,
3084              const struct sp_sampler *sp_samp,
3085              const float s[TGSI_QUAD_SIZE],
3086              const float t[TGSI_QUAD_SIZE],
3087              const float p[TGSI_QUAD_SIZE],
3088              const float c0[TGSI_QUAD_SIZE],
3089              float ssss[TGSI_QUAD_SIZE],
3090              float tttt[TGSI_QUAD_SIZE],
3091              float pppp[TGSI_QUAD_SIZE],
3092              uint faces[TGSI_QUAD_SIZE])
3093 {
3094    unsigned j;
3095
3096    pppp[0] = c0[0];
3097    pppp[1] = c0[1];
3098    pppp[2] = c0[2];
3099    pppp[3] = c0[3];
3100    /*
3101      major axis
3102      direction    target                             sc     tc    ma
3103      ----------   -------------------------------    ---    ---   ---
3104      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
3105      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
3106      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
3107      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
3108      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
3109      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
3110    */
3111
3112    /* Choose the cube face and compute new s/t coords for the 2D face.
3113     *
3114     * Use the same cube face for all four pixels in the quad.
3115     *
3116     * This isn't ideal, but if we want to use a different cube face
3117     * per pixel in the quad, we'd have to also compute the per-face
3118     * LOD here too.  That's because the four post-face-selection
3119     * texcoords are no longer related to each other (they're
3120     * per-face!)  so we can't use subtraction to compute the partial
3121     * deriviates to compute the LOD.  Doing so (near cube edges
3122     * anyway) gives us pretty much random values.
3123     */
3124    {
3125       /* use the average of the four pixel's texcoords to choose the face */
3126       const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
3127       const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
3128       const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
3129       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
3130
3131       if (arx >= ary && arx >= arz) {
3132          const float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
3133          const uint face = (rx >= 0.0F) ?
3134             PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
3135          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3136             const float ima = -0.5F / fabsf(s[j]);
3137             ssss[j] = sign *  p[j] * ima + 0.5F;
3138             tttt[j] =         t[j] * ima + 0.5F;
3139             faces[j] = face;
3140          }
3141       }
3142       else if (ary >= arx && ary >= arz) {
3143          const float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
3144          const uint face = (ry >= 0.0F) ?
3145             PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
3146          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3147             const float ima = -0.5F / fabsf(t[j]);
3148             ssss[j] =        -s[j] * ima + 0.5F;
3149             tttt[j] = sign * -p[j] * ima + 0.5F;
3150             faces[j] = face;
3151          }
3152       }
3153       else {
3154          const float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
3155          const uint face = (rz >= 0.0F) ?
3156             PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
3157          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3158             const float ima = -0.5F / fabsf(p[j]);
3159             ssss[j] = sign * -s[j] * ima + 0.5F;
3160             tttt[j] =         t[j] * ima + 0.5F;
3161             faces[j] = face;
3162          }
3163       }
3164    }
3165 }
3166
3167
3168 static void
3169 sp_get_dims(const struct sp_sampler_view *sp_sview,
3170             int level,
3171             int dims[4])
3172 {
3173    const struct pipe_sampler_view *view = &sp_sview->base;
3174    const struct pipe_resource *texture = view->texture;
3175
3176    if (view->target == PIPE_BUFFER) {
3177       dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1;
3178       /* the other values are undefined, but let's avoid potential valgrind
3179        * warnings.
3180        */
3181       dims[1] = dims[2] = dims[3] = 0;
3182       return;
3183    }
3184
3185    /* undefined according to EXT_gpu_program */
3186    level += view->u.tex.first_level;
3187    if (level > view->u.tex.last_level)
3188       return;
3189
3190    dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
3191    dims[0] = u_minify(texture->width0, level);
3192
3193    switch (view->target) {
3194    case PIPE_TEXTURE_1D_ARRAY:
3195       dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3196       /* fallthrough */
3197    case PIPE_TEXTURE_1D:
3198       return;
3199    case PIPE_TEXTURE_2D_ARRAY:
3200       dims[2] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3201       /* fallthrough */
3202    case PIPE_TEXTURE_2D:
3203    case PIPE_TEXTURE_CUBE:
3204    case PIPE_TEXTURE_RECT:
3205       dims[1] = u_minify(texture->height0, level);
3206       return;
3207    case PIPE_TEXTURE_3D:
3208       dims[1] = u_minify(texture->height0, level);
3209       dims[2] = u_minify(texture->depth0, level);
3210       return;
3211    case PIPE_TEXTURE_CUBE_ARRAY:
3212       dims[1] = u_minify(texture->height0, level);
3213       dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
3214       break;
3215    default:
3216       assert(!"unexpected texture target in sp_get_dims()");
3217       return;
3218    }
3219 }
3220
3221 /**
3222  * This function is only used for getting unfiltered texels via the
3223  * TXF opcode.  The GL spec says that out-of-bounds texel fetches
3224  * produce undefined results.  Instead of crashing, lets just clamp
3225  * coords to the texture image size.
3226  */
3227 static void
3228 sp_get_texels(const struct sp_sampler_view *sp_sview,
3229               const int v_i[TGSI_QUAD_SIZE],
3230               const int v_j[TGSI_QUAD_SIZE],
3231               const int v_k[TGSI_QUAD_SIZE],
3232               const int lod[TGSI_QUAD_SIZE],
3233               const int8_t offset[3],
3234               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3235 {
3236    union tex_tile_address addr;
3237    const struct pipe_resource *texture = sp_sview->base.texture;
3238    int j, c;
3239    const float *tx;
3240    /* TODO write a better test for LOD */
3241    const unsigned level =
3242       sp_sview->base.target == PIPE_BUFFER ? 0 :
3243       CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
3244             sp_sview->base.u.tex.first_level,
3245             sp_sview->base.u.tex.last_level);
3246    const int width = u_minify(texture->width0, level);
3247    const int height = u_minify(texture->height0, level);
3248    const int depth = u_minify(texture->depth0, level);
3249
3250    addr.value = 0;
3251    addr.bits.level = level;
3252
3253    switch (sp_sview->base.target) {
3254    case PIPE_BUFFER:
3255    case PIPE_TEXTURE_1D:
3256       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3257          const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3258          tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
3259          for (c = 0; c < 4; c++) {
3260             rgba[c][j] = tx[c];
3261          }
3262       }
3263       break;
3264    case PIPE_TEXTURE_1D_ARRAY:
3265       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3266          const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3267          const int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
3268                              sp_sview->base.u.tex.last_layer);
3269          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3270          for (c = 0; c < 4; c++) {
3271             rgba[c][j] = tx[c];
3272          }
3273       }
3274       break;
3275    case PIPE_TEXTURE_2D:
3276    case PIPE_TEXTURE_RECT:
3277       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3278          const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3279          const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3280          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3281          for (c = 0; c < 4; c++) {
3282             rgba[c][j] = tx[c];
3283          }
3284       }
3285       break;
3286    case PIPE_TEXTURE_2D_ARRAY:
3287       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3288          const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3289          const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3290          const int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
3291                                  sp_sview->base.u.tex.last_layer);
3292          tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
3293          for (c = 0; c < 4; c++) {
3294             rgba[c][j] = tx[c];
3295          }
3296       }
3297       break;
3298    case PIPE_TEXTURE_3D:
3299       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3300          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3301          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3302          int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
3303          tx = get_texel_3d_no_border(sp_sview, addr, x, y, z);
3304          for (c = 0; c < 4; c++) {
3305             rgba[c][j] = tx[c];
3306          }
3307       }
3308       break;
3309    case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
3310    default:
3311       assert(!"Unknown or CUBE texture type in TXF processing\n");
3312       break;
3313    }
3314
3315    if (sp_sview->need_swizzle) {
3316       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3317       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3318       do_swizzling(&sp_sview->base, rgba_temp, rgba);
3319    }
3320 }
3321
3322
3323 void *
3324 softpipe_create_sampler_state(struct pipe_context *pipe,
3325                               const struct pipe_sampler_state *sampler)
3326 {
3327    struct sp_sampler *samp = CALLOC_STRUCT(sp_sampler);
3328
3329    samp->base = *sampler;
3330
3331    /* Note that (for instance) linear_texcoord_s and
3332     * nearest_texcoord_s may be active at the same time, if the
3333     * sampler min_img_filter differs from its mag_img_filter.
3334     */
3335    if (sampler->normalized_coords) {
3336       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
3337       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
3338       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
3339
3340       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
3341       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
3342       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
3343    }
3344    else {
3345       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
3346       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
3347       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
3348
3349       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
3350       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
3351       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
3352    }
3353
3354    samp->min_img_filter = sampler->min_img_filter;
3355
3356    switch (sampler->min_mip_filter) {
3357    case PIPE_TEX_MIPFILTER_NONE:
3358       if (sampler->min_img_filter == sampler->mag_img_filter)
3359          samp->filter_funcs = &funcs_none_no_filter_select;
3360       else
3361          samp->filter_funcs = &funcs_none;
3362       break;
3363
3364    case PIPE_TEX_MIPFILTER_NEAREST:
3365       samp->filter_funcs = &funcs_nearest;
3366       break;
3367
3368    case PIPE_TEX_MIPFILTER_LINEAR:
3369       if (sampler->min_img_filter == sampler->mag_img_filter &&
3370           sampler->normalized_coords &&
3371           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
3372           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
3373           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
3374           sampler->max_anisotropy <= 1) {
3375          samp->min_mag_equal_repeat_linear = TRUE;
3376       }
3377       samp->filter_funcs = &funcs_linear;
3378
3379       /* Anisotropic filtering extension. */
3380       if (sampler->max_anisotropy > 1) {
3381          samp->filter_funcs = &funcs_linear_aniso;
3382
3383          /* Override min_img_filter:
3384           * min_img_filter needs to be set to NEAREST since we need to access
3385           * each texture pixel as it is and weight it later; using linear
3386           * filters will have incorrect results.
3387           * By setting the filter to NEAREST here, we can avoid calling the
3388           * generic img_filter_2d_nearest in the anisotropic filter function,
3389           * making it possible to use one of the accelerated implementations
3390           */
3391          samp->min_img_filter = PIPE_TEX_FILTER_NEAREST;
3392
3393          /* on first access create the lookup table containing the filter weights. */
3394         if (!weightLut) {
3395            create_filter_table();
3396         }
3397       }
3398       break;
3399    }
3400    if (samp->min_img_filter == sampler->mag_img_filter) {
3401       samp->min_mag_equal = TRUE;
3402    }
3403
3404    return (void *)samp;
3405 }
3406
3407
3408 compute_lambda_func
3409 softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader)
3410 {
3411    if (shader != PIPE_SHADER_FRAGMENT)
3412       return compute_lambda_vert;
3413
3414    switch (view->target) {
3415    case PIPE_BUFFER:
3416    case PIPE_TEXTURE_1D:
3417    case PIPE_TEXTURE_1D_ARRAY:
3418       return compute_lambda_1d;
3419    case PIPE_TEXTURE_2D:
3420    case PIPE_TEXTURE_2D_ARRAY:
3421    case PIPE_TEXTURE_RECT:
3422    case PIPE_TEXTURE_CUBE:
3423    case PIPE_TEXTURE_CUBE_ARRAY:
3424       return compute_lambda_2d;
3425    case PIPE_TEXTURE_3D:
3426       return compute_lambda_3d;
3427    default:
3428       assert(0);
3429       return compute_lambda_1d;
3430    }
3431 }
3432
3433
3434 struct pipe_sampler_view *
3435 softpipe_create_sampler_view(struct pipe_context *pipe,
3436                              struct pipe_resource *resource,
3437                              const struct pipe_sampler_view *templ)
3438 {
3439    struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
3440    const struct softpipe_resource *spr = (struct softpipe_resource *)resource;
3441
3442    if (sview) {
3443       struct pipe_sampler_view *view = &sview->base;
3444       *view = *templ;
3445       view->reference.count = 1;
3446       view->texture = NULL;
3447       pipe_resource_reference(&view->texture, resource);
3448       view->context = pipe;
3449
3450 #ifdef DEBUG
3451      /*
3452       * This is possibly too lenient, but the primary reason is just
3453       * to catch state trackers which forget to initialize this, so
3454       * it only catches clearly impossible view targets.
3455       */
3456       if (view->target != resource->target) {
3457          if (view->target == PIPE_TEXTURE_1D)
3458             assert(resource->target == PIPE_TEXTURE_1D_ARRAY);
3459          else if (view->target == PIPE_TEXTURE_1D_ARRAY)
3460             assert(resource->target == PIPE_TEXTURE_1D);
3461          else if (view->target == PIPE_TEXTURE_2D)
3462             assert(resource->target == PIPE_TEXTURE_2D_ARRAY ||
3463                    resource->target == PIPE_TEXTURE_CUBE ||
3464                    resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3465          else if (view->target == PIPE_TEXTURE_2D_ARRAY)
3466             assert(resource->target == PIPE_TEXTURE_2D ||
3467                    resource->target == PIPE_TEXTURE_CUBE ||
3468                    resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3469          else if (view->target == PIPE_TEXTURE_CUBE)
3470             assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY ||
3471                    resource->target == PIPE_TEXTURE_2D_ARRAY);
3472          else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
3473             assert(resource->target == PIPE_TEXTURE_CUBE ||
3474                    resource->target == PIPE_TEXTURE_2D_ARRAY);
3475          else
3476             assert(0);
3477       }
3478 #endif
3479
3480       if (any_swizzle(view)) {
3481          sview->need_swizzle = TRUE;
3482       }
3483
3484       sview->need_cube_convert = (view->target == PIPE_TEXTURE_CUBE ||
3485                                   view->target == PIPE_TEXTURE_CUBE_ARRAY);
3486       sview->pot2d = spr->pot &&
3487                      (view->target == PIPE_TEXTURE_2D ||
3488                       view->target == PIPE_TEXTURE_RECT);
3489
3490       sview->xpot = util_logbase2( resource->width0 );
3491       sview->ypot = util_logbase2( resource->height0 );
3492    }
3493
3494    return (struct pipe_sampler_view *) sview;
3495 }
3496
3497
3498 static inline const struct sp_tgsi_sampler *
3499 sp_tgsi_sampler_cast_c(const struct tgsi_sampler *sampler)
3500 {
3501    return (const struct sp_tgsi_sampler *)sampler;
3502 }
3503
3504
3505 static void
3506 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3507                  const unsigned sview_index,
3508                  int level, int dims[4])
3509 {
3510    const struct sp_tgsi_sampler *sp_samp =
3511       sp_tgsi_sampler_cast_c(tgsi_sampler);
3512
3513    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3514    /* always have a view here but texture is NULL if no sampler view was set. */
3515    if (!sp_samp->sp_sview[sview_index].base.texture) {
3516       dims[0] = dims[1] = dims[2] = dims[3] = 0;
3517       return;
3518    }
3519    sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
3520 }
3521
3522
3523 static void
3524 sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3525                     const unsigned sview_index,
3526                     const unsigned sampler_index,
3527                     const float s[TGSI_QUAD_SIZE],
3528                     const float t[TGSI_QUAD_SIZE],
3529                     const float p[TGSI_QUAD_SIZE],
3530                     const float c0[TGSI_QUAD_SIZE],
3531                     const float lod[TGSI_QUAD_SIZE],
3532                     float derivs[3][2][TGSI_QUAD_SIZE],
3533                     const int8_t offset[3],
3534                     enum tgsi_sampler_control control,
3535                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3536 {
3537    const struct sp_tgsi_sampler *sp_tgsi_samp =
3538       sp_tgsi_sampler_cast_c(tgsi_sampler);
3539    const struct sp_sampler_view *sp_sview;
3540    const struct sp_sampler *sp_samp;
3541    struct filter_args filt_args;
3542
3543    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3544    assert(sampler_index < PIPE_MAX_SAMPLERS);
3545    assert(sp_tgsi_samp->sp_sampler[sampler_index]);
3546
3547    sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
3548    sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
3549    /* always have a view here but texture is NULL if no sampler view was set. */
3550    if (!sp_sview->base.texture) {
3551       int i, j;
3552       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3553          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3554             rgba[j][i] = 0.0f;
3555          }
3556       }
3557       return;
3558    }
3559
3560    filt_args.control = control;
3561    filt_args.offset = offset;
3562
3563    if (sp_sview->need_cube_convert) {
3564       float cs[TGSI_QUAD_SIZE];
3565       float ct[TGSI_QUAD_SIZE];
3566       float cp[TGSI_QUAD_SIZE];
3567       uint faces[TGSI_QUAD_SIZE];
3568
3569       convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
3570
3571       filt_args.faces = faces;
3572       sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
3573    } else {
3574       static const uint zero_faces[TGSI_QUAD_SIZE] = {0, 0, 0, 0};
3575
3576       filt_args.faces = zero_faces;
3577       sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
3578    }
3579 }
3580
3581 static void
3582 sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
3583                   const unsigned sview_index,
3584                   const unsigned sampler_index,
3585                   const float s[TGSI_QUAD_SIZE],
3586                   const float t[TGSI_QUAD_SIZE],
3587                   const float p[TGSI_QUAD_SIZE],
3588                   const float c0[TGSI_QUAD_SIZE],
3589                   const enum tgsi_sampler_control control,
3590                   float mipmap[TGSI_QUAD_SIZE],
3591                   float lod[TGSI_QUAD_SIZE])
3592 {
3593    static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
3594
3595    const struct sp_tgsi_sampler *sp_tgsi_samp =
3596       sp_tgsi_sampler_cast_c(tgsi_sampler);
3597    const struct sp_sampler_view *sp_sview;
3598    const struct sp_sampler *sp_samp;
3599    const struct sp_filter_funcs *funcs;
3600    int i;
3601
3602    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3603    assert(sampler_index < PIPE_MAX_SAMPLERS);
3604    assert(sp_tgsi_samp->sp_sampler[sampler_index]);
3605
3606    sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
3607    sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
3608    /* always have a view here but texture is NULL if no sampler view was
3609     * set. */
3610    if (!sp_sview->base.texture) {
3611       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3612          mipmap[i] = 0.0f;
3613          lod[i] = 0.0f;
3614       }
3615       return;
3616    }
3617
3618    if (sp_sview->need_cube_convert) {
3619       float cs[TGSI_QUAD_SIZE];
3620       float ct[TGSI_QUAD_SIZE];
3621       float cp[TGSI_QUAD_SIZE];
3622       uint unused_faces[TGSI_QUAD_SIZE];
3623
3624       convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, unused_faces);
3625       compute_lambda_lod_unclamped(sp_sview, sp_samp,
3626                                    cs, ct, cp, lod_in, control, lod);
3627    } else {
3628       compute_lambda_lod_unclamped(sp_sview, sp_samp,
3629                                    s, t, p, lod_in, control, lod);
3630    }
3631
3632    get_filters(sp_sview, sp_samp, control, &funcs, NULL, NULL);
3633    funcs->relative_level(sp_sview, sp_samp, lod, mipmap);
3634 }
3635
3636 static void
3637 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3638                   const unsigned sview_index,
3639                   const int i[TGSI_QUAD_SIZE],
3640                   const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3641                   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3642                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3643 {
3644    const struct sp_tgsi_sampler *sp_samp =
3645       sp_tgsi_sampler_cast_c(tgsi_sampler);
3646
3647    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3648    /* always have a view here but texture is NULL if no sampler view was set. */
3649    if (!sp_samp->sp_sview[sview_index].base.texture) {
3650       int i, j;
3651       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3652          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3653             rgba[j][i] = 0.0f;
3654          }
3655       }
3656       return;
3657    }
3658    sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
3659 }
3660
3661
3662 struct sp_tgsi_sampler *
3663 sp_create_tgsi_sampler(void)
3664 {
3665    struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3666    if (!samp)
3667       return NULL;
3668
3669    samp->base.get_dims = sp_tgsi_get_dims;
3670    samp->base.get_samples = sp_tgsi_get_samples;
3671    samp->base.get_texel = sp_tgsi_get_texel;
3672    samp->base.query_lod = sp_tgsi_query_lod;
3673
3674    return samp;
3675 }